]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - kernel/trace/trace.c
Merge tag 'trace-v4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[mirror_ubuntu-bionic-kernel.git] / kernel / trace / trace.c
1 /*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
13 */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50 * On boot up, the ring buffer is set to the minimum size, so that
51 * we do not waste memory on systems that are not using tracing.
52 */
53 bool ring_buffer_expanded;
54
55 /*
56 * We need to change this state when a selftest is running.
57 * A selftest will lurk into the ring-buffer to count the
58 * entries inserted during the selftest although some concurrent
59 * insertions into the ring-buffer such as trace_printk could occurred
60 * at the same time, giving false positive or negative results.
61 */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65 * If a tracer is running, we do not want to run SELFTEST.
66 */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76 { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82 return 0;
83 }
84
85 /*
86 * To prevent the comm cache from being overwritten when no
87 * tracing is active, only save the comm when a trace event
88 * occurred.
89 */
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
91
92 /*
93 * Kill all tracing for good (never come back).
94 * It is initialized to 1 but will turn to zero if the initialization
95 * of the tracer is successful. But that is the only place that sets
96 * this back to zero.
97 */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly tracing_buffer_mask;
101
102 /*
103 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104 *
105 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106 * is set, then ftrace_dump is called. This will output the contents
107 * of the ftrace buffers to the console. This is very useful for
108 * capturing traces that lead to crashes and outputing it to a
109 * serial console.
110 *
111 * It is default off, but you can enable it with either specifying
112 * "ftrace_dump_on_oops" in the kernel command line, or setting
113 * /proc/sys/kernel/ftrace_dump_on_oops
114 * Set 1 if you want to dump buffers of all CPUs
115 * Set 2 if you want to dump the buffer of the CPU that triggered oops
116 */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126 struct module *mod;
127 unsigned long length;
128 };
129
130 union trace_enum_map_item;
131
132 struct trace_enum_map_tail {
133 /*
134 * "end" is first and points to NULL as it must be different
135 * than "mod" or "enum_string"
136 */
137 union trace_enum_map_item *next;
138 const char *end; /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_enum_mutex);
142
143 /*
144 * The trace_enum_maps are saved in an array with two extra elements,
145 * one at the beginning, and one at the end. The beginning item contains
146 * the count of the saved maps (head.length), and the module they
147 * belong to if not built in (head.mod). The ending item contains a
148 * pointer to the next array of saved enum_map items.
149 */
150 union trace_enum_map_item {
151 struct trace_enum_map map;
152 struct trace_enum_map_head head;
153 struct trace_enum_map_tail tail;
154 };
155
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE 100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 default_bootup_tracer = bootup_tracer_buf;
171 /* We are using ftrace early, expand it */
172 ring_buffer_expanded = true;
173 return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179 if (*str++ != '=' || !*str) {
180 ftrace_dump_on_oops = DUMP_ALL;
181 return 1;
182 }
183
184 if (!strcmp("orig_cpu", str)) {
185 ftrace_dump_on_oops = DUMP_ORIG;
186 return 1;
187 }
188
189 return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 __disable_trace_on_warning = 1;
197 return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203 allocate_snapshot = true;
204 /* We also need the main ring buffer expanded */
205 ring_buffer_expanded = true;
206 return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216 return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 trace_boot_clock = trace_boot_clock_buf;
227 return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 tracepoint_printk = 1;
235 return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241 nsec += 500;
242 do_div(nsec, 1000);
243 return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS \
248 (FUNCTION_DEFAULT_FLAGS | \
249 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
250 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
251 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
252 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
256 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 TRACE_ITER_EVENT_FORK
261
262 /*
263 * The global_trace is the descriptor that holds the top-level tracing
264 * buffers for the live tracing.
265 */
266 static struct trace_array global_trace = {
267 .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274 struct trace_array *tr;
275 int ret = -ENODEV;
276
277 mutex_lock(&trace_types_lock);
278 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279 if (tr == this_tr) {
280 tr->ref++;
281 ret = 0;
282 break;
283 }
284 }
285 mutex_unlock(&trace_types_lock);
286
287 return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292 WARN_ON(!this_tr->ref);
293 this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298 mutex_lock(&trace_types_lock);
299 __trace_array_put(this_tr);
300 mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 struct ring_buffer *buffer,
305 struct ring_buffer_event *event)
306 {
307 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 !filter_match_preds(call->filter, rec)) {
309 __trace_event_discard_commit(buffer, event);
310 return 1;
311 }
312
313 return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318 vfree(pid_list->pids);
319 kfree(pid_list);
320 }
321
322 /**
323 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324 * @filtered_pids: The list of pids to check
325 * @search_pid: The PID to find in @filtered_pids
326 *
327 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328 */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332 /*
333 * If pid_max changed after filtered_pids was created, we
334 * by default ignore all pids greater than the previous pid_max.
335 */
336 if (search_pid >= filtered_pids->pid_max)
337 return false;
338
339 return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343 * trace_ignore_this_task - should a task be ignored for tracing
344 * @filtered_pids: The list of pids to check
345 * @task: The task that should be ignored if not filtered
346 *
347 * Checks if @task should be traced or not from @filtered_pids.
348 * Returns true if @task should *NOT* be traced.
349 * Returns false if @task should be traced.
350 */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354 /*
355 * Return false, because if filtered_pids does not exist,
356 * all pids are good to trace.
357 */
358 if (!filtered_pids)
359 return false;
360
361 return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366 * @pid_list: The list to modify
367 * @self: The current task for fork or NULL for exit
368 * @task: The task to add or remove
369 *
370 * If adding a task, if @self is defined, the task is only added if @self
371 * is also included in @pid_list. This happens on fork and tasks should
372 * only be added when the parent is listed. If @self is NULL, then the
373 * @task pid will be removed from the list, which would happen on exit
374 * of a task.
375 */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 struct task_struct *self,
378 struct task_struct *task)
379 {
380 if (!pid_list)
381 return;
382
383 /* For forks, we only add if the forking task is listed */
384 if (self) {
385 if (!trace_find_filtered_pid(pid_list, self->pid))
386 return;
387 }
388
389 /* Sorry, but we don't support pid_max changing after setting */
390 if (task->pid >= pid_list->pid_max)
391 return;
392
393 /* "self" is set for forks, and NULL for exits */
394 if (self)
395 set_bit(task->pid, pid_list->pids);
396 else
397 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402 * @pid_list: The pid list to show
403 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404 * @pos: The position of the file
405 *
406 * This is used by the seq_file "next" operation to iterate the pids
407 * listed in a trace_pid_list structure.
408 *
409 * Returns the pid+1 as we want to display pid of zero, but NULL would
410 * stop the iteration.
411 */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414 unsigned long pid = (unsigned long)v;
415
416 (*pos)++;
417
418 /* pid already is +1 of the actual prevous bit */
419 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421 /* Return pid + 1 to allow zero to be represented */
422 if (pid < pid_list->pid_max)
423 return (void *)(pid + 1);
424
425 return NULL;
426 }
427
428 /**
429 * trace_pid_start - Used for seq_file to start reading pid lists
430 * @pid_list: The pid list to show
431 * @pos: The position of the file
432 *
433 * This is used by seq_file "start" operation to start the iteration
434 * of listing pids.
435 *
436 * Returns the pid+1 as we want to display pid of zero, but NULL would
437 * stop the iteration.
438 */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441 unsigned long pid;
442 loff_t l = 0;
443
444 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 if (pid >= pid_list->pid_max)
446 return NULL;
447
448 /* Return pid + 1 so that zero can be the exit value */
449 for (pid++; pid && l < *pos;
450 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451 ;
452 return (void *)pid;
453 }
454
455 /**
456 * trace_pid_show - show the current pid in seq_file processing
457 * @m: The seq_file structure to write into
458 * @v: A void pointer of the pid (+1) value to display
459 *
460 * Can be directly used by seq_file operations to display the current
461 * pid value.
462 */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465 unsigned long pid = (unsigned long)v - 1;
466
467 seq_printf(m, "%lu\n", pid);
468 return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE 127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 struct trace_pid_list **new_pid_list,
476 const char __user *ubuf, size_t cnt)
477 {
478 struct trace_pid_list *pid_list;
479 struct trace_parser parser;
480 unsigned long val;
481 int nr_pids = 0;
482 ssize_t read = 0;
483 ssize_t ret = 0;
484 loff_t pos;
485 pid_t pid;
486
487 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488 return -ENOMEM;
489
490 /*
491 * Always recreate a new array. The write is an all or nothing
492 * operation. Always create a new array when adding new pids by
493 * the user. If the operation fails, then the current list is
494 * not modified.
495 */
496 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497 if (!pid_list)
498 return -ENOMEM;
499
500 pid_list->pid_max = READ_ONCE(pid_max);
501
502 /* Only truncating will shrink pid_max */
503 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504 pid_list->pid_max = filtered_pids->pid_max;
505
506 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507 if (!pid_list->pids) {
508 kfree(pid_list);
509 return -ENOMEM;
510 }
511
512 if (filtered_pids) {
513 /* copy the current bits to the new max */
514 for_each_set_bit(pid, filtered_pids->pids,
515 filtered_pids->pid_max) {
516 set_bit(pid, pid_list->pids);
517 nr_pids++;
518 }
519 }
520
521 while (cnt > 0) {
522
523 pos = 0;
524
525 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526 if (ret < 0 || !trace_parser_loaded(&parser))
527 break;
528
529 read += ret;
530 ubuf += ret;
531 cnt -= ret;
532
533 parser.buffer[parser.idx] = 0;
534
535 ret = -EINVAL;
536 if (kstrtoul(parser.buffer, 0, &val))
537 break;
538 if (val >= pid_list->pid_max)
539 break;
540
541 pid = (pid_t)val;
542
543 set_bit(pid, pid_list->pids);
544 nr_pids++;
545
546 trace_parser_clear(&parser);
547 ret = 0;
548 }
549 trace_parser_put(&parser);
550
551 if (ret < 0) {
552 trace_free_pid_list(pid_list);
553 return ret;
554 }
555
556 if (!nr_pids) {
557 /* Cleared the list of pids */
558 trace_free_pid_list(pid_list);
559 read = ret;
560 pid_list = NULL;
561 }
562
563 *new_pid_list = pid_list;
564
565 return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 u64 ts;
571
572 /* Early boot up does not have a buffer yet */
573 if (!buf->buffer)
574 return trace_clock_local();
575
576 ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579 return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588 * tracing_is_enabled - Show if global_trace has been disabled
589 *
590 * Shows if the global trace has been enabled or not. It uses the
591 * mirror flag "buffer_disabled" to be used in fast paths such as for
592 * the irqsoff tracer. But it may be inaccurate due to races. If you
593 * need to know the accurate state, use tracing_is_on() which is a little
594 * slower, but accurate.
595 */
596 int tracing_is_enabled(void)
597 {
598 /*
599 * For quick access (irqsoff uses this in fast path), just
600 * return the mirror variable of the state of the ring buffer.
601 * It's a little racy, but we don't really care.
602 */
603 smp_rmb();
604 return !global_trace.buffer_disabled;
605 }
606
607 /*
608 * trace_buf_size is the size in bytes that is allocated
609 * for a buffer. Note, the number of bytes is always rounded
610 * to page size.
611 *
612 * This number is purposely set to a low number of 16384.
613 * If the dump on oops happens, it will be much appreciated
614 * to not have to wait for all that output. Anyway this can be
615 * boot time and run time configurable.
616 */
617 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer *trace_types __read_mostly;
623
624 /*
625 * trace_types_lock is used to protect the trace_types list.
626 */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630 * serialize the access of the ring buffer
631 *
632 * ring buffer serializes readers, but it is low level protection.
633 * The validity of the events (which returns by ring_buffer_peek() ..etc)
634 * are not protected by ring buffer.
635 *
636 * The content of events may become garbage if we allow other process consumes
637 * these events concurrently:
638 * A) the page of the consumed events may become a normal page
639 * (not reader page) in ring buffer, and this page will be rewrited
640 * by events producer.
641 * B) The page of the consumed events may become a page for splice_read,
642 * and this page will be returned to system.
643 *
644 * These primitives allow multi process access to different cpu ring buffer
645 * concurrently.
646 *
647 * These primitives don't distinguish read-only and read-consume access.
648 * Multi read-only access are also serialized.
649 */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657 if (cpu == RING_BUFFER_ALL_CPUS) {
658 /* gain it for accessing the whole ring buffer. */
659 down_write(&all_cpu_access_lock);
660 } else {
661 /* gain it for accessing a cpu ring buffer. */
662
663 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 down_read(&all_cpu_access_lock);
665
666 /* Secondly block other access to this @cpu ring buffer. */
667 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673 if (cpu == RING_BUFFER_ALL_CPUS) {
674 up_write(&all_cpu_access_lock);
675 } else {
676 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 up_read(&all_cpu_access_lock);
678 }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683 int cpu;
684
685 for_each_possible_cpu(cpu)
686 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695 (void)cpu;
696 mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701 (void)cpu;
702 mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 unsigned long flags,
714 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 struct ring_buffer *buffer,
717 unsigned long flags,
718 int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 unsigned long flags,
723 int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 struct ring_buffer *buffer,
728 unsigned long flags,
729 int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 int type, unsigned long flags, int pc)
738 {
739 struct trace_entry *ent = ring_buffer_event_data(event);
740
741 tracing_generic_entry_update(ent, flags, pc);
742 ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 int type,
748 unsigned long len,
749 unsigned long flags, int pc)
750 {
751 struct ring_buffer_event *event;
752
753 event = ring_buffer_lock_reserve(buffer, len);
754 if (event != NULL)
755 trace_event_setup(event, type, flags, pc);
756
757 return event;
758 }
759
760 static void tracer_tracing_on(struct trace_array *tr)
761 {
762 if (tr->trace_buffer.buffer)
763 ring_buffer_record_on(tr->trace_buffer.buffer);
764 /*
765 * This flag is looked at when buffers haven't been allocated
766 * yet, or by some tracers (like irqsoff), that just want to
767 * know if the ring buffer has been disabled, but it can handle
768 * races of where it gets disabled but we still do a record.
769 * As the check is in the fast path of the tracers, it is more
770 * important to be fast than accurate.
771 */
772 tr->buffer_disabled = 0;
773 /* Make the flag seen by readers */
774 smp_wmb();
775 }
776
777 /**
778 * tracing_on - enable tracing buffers
779 *
780 * This function enables tracing buffers that may have been
781 * disabled with tracing_off.
782 */
783 void tracing_on(void)
784 {
785 tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 __this_cpu_write(trace_cmdline_save, true);
794
795 /* If this is the temp buffer, we need to commit fully */
796 if (this_cpu_read(trace_buffered_event) == event) {
797 /* Length is in event->array[0] */
798 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 /* Release the temp buffer */
800 this_cpu_dec(trace_buffered_event_cnt);
801 } else
802 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806 * __trace_puts - write a constant string into the trace buffer.
807 * @ip: The address of the caller
808 * @str: The constant string to write
809 * @size: The size of the string.
810 */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 struct ring_buffer_event *event;
814 struct ring_buffer *buffer;
815 struct print_entry *entry;
816 unsigned long irq_flags;
817 int alloc;
818 int pc;
819
820 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 return 0;
822
823 pc = preempt_count();
824
825 if (unlikely(tracing_selftest_running || tracing_disabled))
826 return 0;
827
828 alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830 local_save_flags(irq_flags);
831 buffer = global_trace.trace_buffer.buffer;
832 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 irq_flags, pc);
834 if (!event)
835 return 0;
836
837 entry = ring_buffer_event_data(event);
838 entry->ip = ip;
839
840 memcpy(&entry->buf, str, size);
841
842 /* Add a newline if necessary */
843 if (entry->buf[size - 1] != '\n') {
844 entry->buf[size] = '\n';
845 entry->buf[size + 1] = '\0';
846 } else
847 entry->buf[size] = '\0';
848
849 __buffer_unlock_commit(buffer, event);
850 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852 return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857 * __trace_bputs - write the pointer to a constant string into trace buffer
858 * @ip: The address of the caller
859 * @str: The constant string to write to the buffer to
860 */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 struct ring_buffer_event *event;
864 struct ring_buffer *buffer;
865 struct bputs_entry *entry;
866 unsigned long irq_flags;
867 int size = sizeof(struct bputs_entry);
868 int pc;
869
870 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 return 0;
872
873 pc = preempt_count();
874
875 if (unlikely(tracing_selftest_running || tracing_disabled))
876 return 0;
877
878 local_save_flags(irq_flags);
879 buffer = global_trace.trace_buffer.buffer;
880 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 irq_flags, pc);
882 if (!event)
883 return 0;
884
885 entry = ring_buffer_event_data(event);
886 entry->ip = ip;
887 entry->str = str;
888
889 __buffer_unlock_commit(buffer, event);
890 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892 return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 /**
898 * trace_snapshot - take a snapshot of the current buffer.
899 *
900 * This causes a swap between the snapshot buffer and the current live
901 * tracing buffer. You can use this to take snapshots of the live
902 * trace when some condition is triggered, but continue to trace.
903 *
904 * Note, make sure to allocate the snapshot with either
905 * a tracing_snapshot_alloc(), or by doing it manually
906 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
907 *
908 * If the snapshot buffer is not allocated, it will stop tracing.
909 * Basically making a permanent snapshot.
910 */
911 void tracing_snapshot(void)
912 {
913 struct trace_array *tr = &global_trace;
914 struct tracer *tracer = tr->current_trace;
915 unsigned long flags;
916
917 if (in_nmi()) {
918 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
919 internal_trace_puts("*** snapshot is being ignored ***\n");
920 return;
921 }
922
923 if (!tr->allocated_snapshot) {
924 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
925 internal_trace_puts("*** stopping trace here! ***\n");
926 tracing_off();
927 return;
928 }
929
930 /* Note, snapshot can not be used when the tracer uses it */
931 if (tracer->use_max_tr) {
932 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
933 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
934 return;
935 }
936
937 local_irq_save(flags);
938 update_max_tr(tr, current, smp_processor_id());
939 local_irq_restore(flags);
940 }
941 EXPORT_SYMBOL_GPL(tracing_snapshot);
942
943 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
944 struct trace_buffer *size_buf, int cpu_id);
945 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
946
947 static int alloc_snapshot(struct trace_array *tr)
948 {
949 int ret;
950
951 if (!tr->allocated_snapshot) {
952
953 /* allocate spare buffer */
954 ret = resize_buffer_duplicate_size(&tr->max_buffer,
955 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
956 if (ret < 0)
957 return ret;
958
959 tr->allocated_snapshot = true;
960 }
961
962 return 0;
963 }
964
965 static void free_snapshot(struct trace_array *tr)
966 {
967 /*
968 * We don't free the ring buffer. instead, resize it because
969 * The max_tr ring buffer has some state (e.g. ring->clock) and
970 * we want preserve it.
971 */
972 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
973 set_buffer_entries(&tr->max_buffer, 1);
974 tracing_reset_online_cpus(&tr->max_buffer);
975 tr->allocated_snapshot = false;
976 }
977
978 /**
979 * tracing_alloc_snapshot - allocate snapshot buffer.
980 *
981 * This only allocates the snapshot buffer if it isn't already
982 * allocated - it doesn't also take a snapshot.
983 *
984 * This is meant to be used in cases where the snapshot buffer needs
985 * to be set up for events that can't sleep but need to be able to
986 * trigger a snapshot.
987 */
988 int tracing_alloc_snapshot(void)
989 {
990 struct trace_array *tr = &global_trace;
991 int ret;
992
993 ret = alloc_snapshot(tr);
994 WARN_ON(ret < 0);
995
996 return ret;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999
1000 /**
1001 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1002 *
1003 * This is similar to trace_snapshot(), but it will allocate the
1004 * snapshot buffer if it isn't already allocated. Use this only
1005 * where it is safe to sleep, as the allocation may sleep.
1006 *
1007 * This causes a swap between the snapshot buffer and the current live
1008 * tracing buffer. You can use this to take snapshots of the live
1009 * trace when some condition is triggered, but continue to trace.
1010 */
1011 void tracing_snapshot_alloc(void)
1012 {
1013 int ret;
1014
1015 ret = tracing_alloc_snapshot();
1016 if (ret < 0)
1017 return;
1018
1019 tracing_snapshot();
1020 }
1021 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1022 #else
1023 void tracing_snapshot(void)
1024 {
1025 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot);
1028 int tracing_alloc_snapshot(void)
1029 {
1030 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1031 return -ENODEV;
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1034 void tracing_snapshot_alloc(void)
1035 {
1036 /* Give warning */
1037 tracing_snapshot();
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1040 #endif /* CONFIG_TRACER_SNAPSHOT */
1041
1042 static void tracer_tracing_off(struct trace_array *tr)
1043 {
1044 if (tr->trace_buffer.buffer)
1045 ring_buffer_record_off(tr->trace_buffer.buffer);
1046 /*
1047 * This flag is looked at when buffers haven't been allocated
1048 * yet, or by some tracers (like irqsoff), that just want to
1049 * know if the ring buffer has been disabled, but it can handle
1050 * races of where it gets disabled but we still do a record.
1051 * As the check is in the fast path of the tracers, it is more
1052 * important to be fast than accurate.
1053 */
1054 tr->buffer_disabled = 1;
1055 /* Make the flag seen by readers */
1056 smp_wmb();
1057 }
1058
1059 /**
1060 * tracing_off - turn off tracing buffers
1061 *
1062 * This function stops the tracing buffers from recording data.
1063 * It does not disable any overhead the tracers themselves may
1064 * be causing. This function simply causes all recording to
1065 * the ring buffers to fail.
1066 */
1067 void tracing_off(void)
1068 {
1069 tracer_tracing_off(&global_trace);
1070 }
1071 EXPORT_SYMBOL_GPL(tracing_off);
1072
1073 void disable_trace_on_warning(void)
1074 {
1075 if (__disable_trace_on_warning)
1076 tracing_off();
1077 }
1078
1079 /**
1080 * tracer_tracing_is_on - show real state of ring buffer enabled
1081 * @tr : the trace array to know if ring buffer is enabled
1082 *
1083 * Shows real state of the ring buffer if it is enabled or not.
1084 */
1085 int tracer_tracing_is_on(struct trace_array *tr)
1086 {
1087 if (tr->trace_buffer.buffer)
1088 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1089 return !tr->buffer_disabled;
1090 }
1091
1092 /**
1093 * tracing_is_on - show state of ring buffers enabled
1094 */
1095 int tracing_is_on(void)
1096 {
1097 return tracer_tracing_is_on(&global_trace);
1098 }
1099 EXPORT_SYMBOL_GPL(tracing_is_on);
1100
1101 static int __init set_buf_size(char *str)
1102 {
1103 unsigned long buf_size;
1104
1105 if (!str)
1106 return 0;
1107 buf_size = memparse(str, &str);
1108 /* nr_entries can not be zero */
1109 if (buf_size == 0)
1110 return 0;
1111 trace_buf_size = buf_size;
1112 return 1;
1113 }
1114 __setup("trace_buf_size=", set_buf_size);
1115
1116 static int __init set_tracing_thresh(char *str)
1117 {
1118 unsigned long threshold;
1119 int ret;
1120
1121 if (!str)
1122 return 0;
1123 ret = kstrtoul(str, 0, &threshold);
1124 if (ret < 0)
1125 return 0;
1126 tracing_thresh = threshold * 1000;
1127 return 1;
1128 }
1129 __setup("tracing_thresh=", set_tracing_thresh);
1130
1131 unsigned long nsecs_to_usecs(unsigned long nsecs)
1132 {
1133 return nsecs / 1000;
1134 }
1135
1136 /*
1137 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1138 * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1139 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1140 * of strings in the order that the enums were defined.
1141 */
1142 #undef C
1143 #define C(a, b) b
1144
1145 /* These must match the bit postions in trace_iterator_flags */
1146 static const char *trace_options[] = {
1147 TRACE_FLAGS
1148 NULL
1149 };
1150
1151 static struct {
1152 u64 (*func)(void);
1153 const char *name;
1154 int in_ns; /* is this clock in nanoseconds? */
1155 } trace_clocks[] = {
1156 { trace_clock_local, "local", 1 },
1157 { trace_clock_global, "global", 1 },
1158 { trace_clock_counter, "counter", 0 },
1159 { trace_clock_jiffies, "uptime", 0 },
1160 { trace_clock, "perf", 1 },
1161 { ktime_get_mono_fast_ns, "mono", 1 },
1162 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1163 { ktime_get_boot_fast_ns, "boot", 1 },
1164 ARCH_TRACE_CLOCKS
1165 };
1166
1167 /*
1168 * trace_parser_get_init - gets the buffer for trace parser
1169 */
1170 int trace_parser_get_init(struct trace_parser *parser, int size)
1171 {
1172 memset(parser, 0, sizeof(*parser));
1173
1174 parser->buffer = kmalloc(size, GFP_KERNEL);
1175 if (!parser->buffer)
1176 return 1;
1177
1178 parser->size = size;
1179 return 0;
1180 }
1181
1182 /*
1183 * trace_parser_put - frees the buffer for trace parser
1184 */
1185 void trace_parser_put(struct trace_parser *parser)
1186 {
1187 kfree(parser->buffer);
1188 parser->buffer = NULL;
1189 }
1190
1191 /*
1192 * trace_get_user - reads the user input string separated by space
1193 * (matched by isspace(ch))
1194 *
1195 * For each string found the 'struct trace_parser' is updated,
1196 * and the function returns.
1197 *
1198 * Returns number of bytes read.
1199 *
1200 * See kernel/trace/trace.h for 'struct trace_parser' details.
1201 */
1202 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1203 size_t cnt, loff_t *ppos)
1204 {
1205 char ch;
1206 size_t read = 0;
1207 ssize_t ret;
1208
1209 if (!*ppos)
1210 trace_parser_clear(parser);
1211
1212 ret = get_user(ch, ubuf++);
1213 if (ret)
1214 goto out;
1215
1216 read++;
1217 cnt--;
1218
1219 /*
1220 * The parser is not finished with the last write,
1221 * continue reading the user input without skipping spaces.
1222 */
1223 if (!parser->cont) {
1224 /* skip white space */
1225 while (cnt && isspace(ch)) {
1226 ret = get_user(ch, ubuf++);
1227 if (ret)
1228 goto out;
1229 read++;
1230 cnt--;
1231 }
1232
1233 /* only spaces were written */
1234 if (isspace(ch)) {
1235 *ppos += read;
1236 ret = read;
1237 goto out;
1238 }
1239
1240 parser->idx = 0;
1241 }
1242
1243 /* read the non-space input */
1244 while (cnt && !isspace(ch)) {
1245 if (parser->idx < parser->size - 1)
1246 parser->buffer[parser->idx++] = ch;
1247 else {
1248 ret = -EINVAL;
1249 goto out;
1250 }
1251 ret = get_user(ch, ubuf++);
1252 if (ret)
1253 goto out;
1254 read++;
1255 cnt--;
1256 }
1257
1258 /* We either got finished input or we have to wait for another call. */
1259 if (isspace(ch)) {
1260 parser->buffer[parser->idx] = 0;
1261 parser->cont = false;
1262 } else if (parser->idx < parser->size - 1) {
1263 parser->cont = true;
1264 parser->buffer[parser->idx++] = ch;
1265 } else {
1266 ret = -EINVAL;
1267 goto out;
1268 }
1269
1270 *ppos += read;
1271 ret = read;
1272
1273 out:
1274 return ret;
1275 }
1276
1277 /* TODO add a seq_buf_to_buffer() */
1278 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1279 {
1280 int len;
1281
1282 if (trace_seq_used(s) <= s->seq.readpos)
1283 return -EBUSY;
1284
1285 len = trace_seq_used(s) - s->seq.readpos;
1286 if (cnt > len)
1287 cnt = len;
1288 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1289
1290 s->seq.readpos += cnt;
1291 return cnt;
1292 }
1293
1294 unsigned long __read_mostly tracing_thresh;
1295
1296 #ifdef CONFIG_TRACER_MAX_TRACE
1297 /*
1298 * Copy the new maximum trace into the separate maximum-trace
1299 * structure. (this way the maximum trace is permanently saved,
1300 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1301 */
1302 static void
1303 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1304 {
1305 struct trace_buffer *trace_buf = &tr->trace_buffer;
1306 struct trace_buffer *max_buf = &tr->max_buffer;
1307 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1308 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1309
1310 max_buf->cpu = cpu;
1311 max_buf->time_start = data->preempt_timestamp;
1312
1313 max_data->saved_latency = tr->max_latency;
1314 max_data->critical_start = data->critical_start;
1315 max_data->critical_end = data->critical_end;
1316
1317 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1318 max_data->pid = tsk->pid;
1319 /*
1320 * If tsk == current, then use current_uid(), as that does not use
1321 * RCU. The irq tracer can be called out of RCU scope.
1322 */
1323 if (tsk == current)
1324 max_data->uid = current_uid();
1325 else
1326 max_data->uid = task_uid(tsk);
1327
1328 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1329 max_data->policy = tsk->policy;
1330 max_data->rt_priority = tsk->rt_priority;
1331
1332 /* record this tasks comm */
1333 tracing_record_cmdline(tsk);
1334 }
1335
1336 /**
1337 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1338 * @tr: tracer
1339 * @tsk: the task with the latency
1340 * @cpu: The cpu that initiated the trace.
1341 *
1342 * Flip the buffers between the @tr and the max_tr and record information
1343 * about which task was the cause of this latency.
1344 */
1345 void
1346 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1347 {
1348 struct ring_buffer *buf;
1349
1350 if (tr->stop_count)
1351 return;
1352
1353 WARN_ON_ONCE(!irqs_disabled());
1354
1355 if (!tr->allocated_snapshot) {
1356 /* Only the nop tracer should hit this when disabling */
1357 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1358 return;
1359 }
1360
1361 arch_spin_lock(&tr->max_lock);
1362
1363 buf = tr->trace_buffer.buffer;
1364 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1365 tr->max_buffer.buffer = buf;
1366
1367 __update_max_tr(tr, tsk, cpu);
1368 arch_spin_unlock(&tr->max_lock);
1369 }
1370
1371 /**
1372 * update_max_tr_single - only copy one trace over, and reset the rest
1373 * @tr - tracer
1374 * @tsk - task with the latency
1375 * @cpu - the cpu of the buffer to copy.
1376 *
1377 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1378 */
1379 void
1380 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1381 {
1382 int ret;
1383
1384 if (tr->stop_count)
1385 return;
1386
1387 WARN_ON_ONCE(!irqs_disabled());
1388 if (!tr->allocated_snapshot) {
1389 /* Only the nop tracer should hit this when disabling */
1390 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1391 return;
1392 }
1393
1394 arch_spin_lock(&tr->max_lock);
1395
1396 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1397
1398 if (ret == -EBUSY) {
1399 /*
1400 * We failed to swap the buffer due to a commit taking
1401 * place on this CPU. We fail to record, but we reset
1402 * the max trace buffer (no one writes directly to it)
1403 * and flag that it failed.
1404 */
1405 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1406 "Failed to swap buffers due to commit in progress\n");
1407 }
1408
1409 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1410
1411 __update_max_tr(tr, tsk, cpu);
1412 arch_spin_unlock(&tr->max_lock);
1413 }
1414 #endif /* CONFIG_TRACER_MAX_TRACE */
1415
1416 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1417 {
1418 /* Iterators are static, they should be filled or empty */
1419 if (trace_buffer_iter(iter, iter->cpu_file))
1420 return 0;
1421
1422 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1423 full);
1424 }
1425
1426 #ifdef CONFIG_FTRACE_STARTUP_TEST
1427 static int run_tracer_selftest(struct tracer *type)
1428 {
1429 struct trace_array *tr = &global_trace;
1430 struct tracer *saved_tracer = tr->current_trace;
1431 int ret;
1432
1433 if (!type->selftest || tracing_selftest_disabled)
1434 return 0;
1435
1436 /*
1437 * Run a selftest on this tracer.
1438 * Here we reset the trace buffer, and set the current
1439 * tracer to be this tracer. The tracer can then run some
1440 * internal tracing to verify that everything is in order.
1441 * If we fail, we do not register this tracer.
1442 */
1443 tracing_reset_online_cpus(&tr->trace_buffer);
1444
1445 tr->current_trace = type;
1446
1447 #ifdef CONFIG_TRACER_MAX_TRACE
1448 if (type->use_max_tr) {
1449 /* If we expanded the buffers, make sure the max is expanded too */
1450 if (ring_buffer_expanded)
1451 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1452 RING_BUFFER_ALL_CPUS);
1453 tr->allocated_snapshot = true;
1454 }
1455 #endif
1456
1457 /* the test is responsible for initializing and enabling */
1458 pr_info("Testing tracer %s: ", type->name);
1459 ret = type->selftest(type, tr);
1460 /* the test is responsible for resetting too */
1461 tr->current_trace = saved_tracer;
1462 if (ret) {
1463 printk(KERN_CONT "FAILED!\n");
1464 /* Add the warning after printing 'FAILED' */
1465 WARN_ON(1);
1466 return -1;
1467 }
1468 /* Only reset on passing, to avoid touching corrupted buffers */
1469 tracing_reset_online_cpus(&tr->trace_buffer);
1470
1471 #ifdef CONFIG_TRACER_MAX_TRACE
1472 if (type->use_max_tr) {
1473 tr->allocated_snapshot = false;
1474
1475 /* Shrink the max buffer again */
1476 if (ring_buffer_expanded)
1477 ring_buffer_resize(tr->max_buffer.buffer, 1,
1478 RING_BUFFER_ALL_CPUS);
1479 }
1480 #endif
1481
1482 printk(KERN_CONT "PASSED\n");
1483 return 0;
1484 }
1485 #else
1486 static inline int run_tracer_selftest(struct tracer *type)
1487 {
1488 return 0;
1489 }
1490 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1491
1492 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1493
1494 static void __init apply_trace_boot_options(void);
1495
1496 /**
1497 * register_tracer - register a tracer with the ftrace system.
1498 * @type - the plugin for the tracer
1499 *
1500 * Register a new plugin tracer.
1501 */
1502 int __init register_tracer(struct tracer *type)
1503 {
1504 struct tracer *t;
1505 int ret = 0;
1506
1507 if (!type->name) {
1508 pr_info("Tracer must have a name\n");
1509 return -1;
1510 }
1511
1512 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1513 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1514 return -1;
1515 }
1516
1517 mutex_lock(&trace_types_lock);
1518
1519 tracing_selftest_running = true;
1520
1521 for (t = trace_types; t; t = t->next) {
1522 if (strcmp(type->name, t->name) == 0) {
1523 /* already found */
1524 pr_info("Tracer %s already registered\n",
1525 type->name);
1526 ret = -1;
1527 goto out;
1528 }
1529 }
1530
1531 if (!type->set_flag)
1532 type->set_flag = &dummy_set_flag;
1533 if (!type->flags) {
1534 /*allocate a dummy tracer_flags*/
1535 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1536 if (!type->flags) {
1537 ret = -ENOMEM;
1538 goto out;
1539 }
1540 type->flags->val = 0;
1541 type->flags->opts = dummy_tracer_opt;
1542 } else
1543 if (!type->flags->opts)
1544 type->flags->opts = dummy_tracer_opt;
1545
1546 /* store the tracer for __set_tracer_option */
1547 type->flags->trace = type;
1548
1549 ret = run_tracer_selftest(type);
1550 if (ret < 0)
1551 goto out;
1552
1553 type->next = trace_types;
1554 trace_types = type;
1555 add_tracer_options(&global_trace, type);
1556
1557 out:
1558 tracing_selftest_running = false;
1559 mutex_unlock(&trace_types_lock);
1560
1561 if (ret || !default_bootup_tracer)
1562 goto out_unlock;
1563
1564 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1565 goto out_unlock;
1566
1567 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1568 /* Do we want this tracer to start on bootup? */
1569 tracing_set_tracer(&global_trace, type->name);
1570 default_bootup_tracer = NULL;
1571
1572 apply_trace_boot_options();
1573
1574 /* disable other selftests, since this will break it. */
1575 tracing_selftest_disabled = true;
1576 #ifdef CONFIG_FTRACE_STARTUP_TEST
1577 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1578 type->name);
1579 #endif
1580
1581 out_unlock:
1582 return ret;
1583 }
1584
1585 void tracing_reset(struct trace_buffer *buf, int cpu)
1586 {
1587 struct ring_buffer *buffer = buf->buffer;
1588
1589 if (!buffer)
1590 return;
1591
1592 ring_buffer_record_disable(buffer);
1593
1594 /* Make sure all commits have finished */
1595 synchronize_sched();
1596 ring_buffer_reset_cpu(buffer, cpu);
1597
1598 ring_buffer_record_enable(buffer);
1599 }
1600
1601 void tracing_reset_online_cpus(struct trace_buffer *buf)
1602 {
1603 struct ring_buffer *buffer = buf->buffer;
1604 int cpu;
1605
1606 if (!buffer)
1607 return;
1608
1609 ring_buffer_record_disable(buffer);
1610
1611 /* Make sure all commits have finished */
1612 synchronize_sched();
1613
1614 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1615
1616 for_each_online_cpu(cpu)
1617 ring_buffer_reset_cpu(buffer, cpu);
1618
1619 ring_buffer_record_enable(buffer);
1620 }
1621
1622 /* Must have trace_types_lock held */
1623 void tracing_reset_all_online_cpus(void)
1624 {
1625 struct trace_array *tr;
1626
1627 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1628 tracing_reset_online_cpus(&tr->trace_buffer);
1629 #ifdef CONFIG_TRACER_MAX_TRACE
1630 tracing_reset_online_cpus(&tr->max_buffer);
1631 #endif
1632 }
1633 }
1634
1635 #define SAVED_CMDLINES_DEFAULT 128
1636 #define NO_CMDLINE_MAP UINT_MAX
1637 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1638 struct saved_cmdlines_buffer {
1639 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1640 unsigned *map_cmdline_to_pid;
1641 unsigned cmdline_num;
1642 int cmdline_idx;
1643 char *saved_cmdlines;
1644 };
1645 static struct saved_cmdlines_buffer *savedcmd;
1646
1647 /* temporary disable recording */
1648 static atomic_t trace_record_cmdline_disabled __read_mostly;
1649
1650 static inline char *get_saved_cmdlines(int idx)
1651 {
1652 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1653 }
1654
1655 static inline void set_cmdline(int idx, const char *cmdline)
1656 {
1657 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1658 }
1659
1660 static int allocate_cmdlines_buffer(unsigned int val,
1661 struct saved_cmdlines_buffer *s)
1662 {
1663 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1664 GFP_KERNEL);
1665 if (!s->map_cmdline_to_pid)
1666 return -ENOMEM;
1667
1668 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1669 if (!s->saved_cmdlines) {
1670 kfree(s->map_cmdline_to_pid);
1671 return -ENOMEM;
1672 }
1673
1674 s->cmdline_idx = 0;
1675 s->cmdline_num = val;
1676 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1677 sizeof(s->map_pid_to_cmdline));
1678 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1679 val * sizeof(*s->map_cmdline_to_pid));
1680
1681 return 0;
1682 }
1683
1684 static int trace_create_savedcmd(void)
1685 {
1686 int ret;
1687
1688 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1689 if (!savedcmd)
1690 return -ENOMEM;
1691
1692 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1693 if (ret < 0) {
1694 kfree(savedcmd);
1695 savedcmd = NULL;
1696 return -ENOMEM;
1697 }
1698
1699 return 0;
1700 }
1701
1702 int is_tracing_stopped(void)
1703 {
1704 return global_trace.stop_count;
1705 }
1706
1707 /**
1708 * tracing_start - quick start of the tracer
1709 *
1710 * If tracing is enabled but was stopped by tracing_stop,
1711 * this will start the tracer back up.
1712 */
1713 void tracing_start(void)
1714 {
1715 struct ring_buffer *buffer;
1716 unsigned long flags;
1717
1718 if (tracing_disabled)
1719 return;
1720
1721 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1722 if (--global_trace.stop_count) {
1723 if (global_trace.stop_count < 0) {
1724 /* Someone screwed up their debugging */
1725 WARN_ON_ONCE(1);
1726 global_trace.stop_count = 0;
1727 }
1728 goto out;
1729 }
1730
1731 /* Prevent the buffers from switching */
1732 arch_spin_lock(&global_trace.max_lock);
1733
1734 buffer = global_trace.trace_buffer.buffer;
1735 if (buffer)
1736 ring_buffer_record_enable(buffer);
1737
1738 #ifdef CONFIG_TRACER_MAX_TRACE
1739 buffer = global_trace.max_buffer.buffer;
1740 if (buffer)
1741 ring_buffer_record_enable(buffer);
1742 #endif
1743
1744 arch_spin_unlock(&global_trace.max_lock);
1745
1746 out:
1747 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1748 }
1749
1750 static void tracing_start_tr(struct trace_array *tr)
1751 {
1752 struct ring_buffer *buffer;
1753 unsigned long flags;
1754
1755 if (tracing_disabled)
1756 return;
1757
1758 /* If global, we need to also start the max tracer */
1759 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1760 return tracing_start();
1761
1762 raw_spin_lock_irqsave(&tr->start_lock, flags);
1763
1764 if (--tr->stop_count) {
1765 if (tr->stop_count < 0) {
1766 /* Someone screwed up their debugging */
1767 WARN_ON_ONCE(1);
1768 tr->stop_count = 0;
1769 }
1770 goto out;
1771 }
1772
1773 buffer = tr->trace_buffer.buffer;
1774 if (buffer)
1775 ring_buffer_record_enable(buffer);
1776
1777 out:
1778 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1779 }
1780
1781 /**
1782 * tracing_stop - quick stop of the tracer
1783 *
1784 * Light weight way to stop tracing. Use in conjunction with
1785 * tracing_start.
1786 */
1787 void tracing_stop(void)
1788 {
1789 struct ring_buffer *buffer;
1790 unsigned long flags;
1791
1792 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1793 if (global_trace.stop_count++)
1794 goto out;
1795
1796 /* Prevent the buffers from switching */
1797 arch_spin_lock(&global_trace.max_lock);
1798
1799 buffer = global_trace.trace_buffer.buffer;
1800 if (buffer)
1801 ring_buffer_record_disable(buffer);
1802
1803 #ifdef CONFIG_TRACER_MAX_TRACE
1804 buffer = global_trace.max_buffer.buffer;
1805 if (buffer)
1806 ring_buffer_record_disable(buffer);
1807 #endif
1808
1809 arch_spin_unlock(&global_trace.max_lock);
1810
1811 out:
1812 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1813 }
1814
1815 static void tracing_stop_tr(struct trace_array *tr)
1816 {
1817 struct ring_buffer *buffer;
1818 unsigned long flags;
1819
1820 /* If global, we need to also stop the max tracer */
1821 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1822 return tracing_stop();
1823
1824 raw_spin_lock_irqsave(&tr->start_lock, flags);
1825 if (tr->stop_count++)
1826 goto out;
1827
1828 buffer = tr->trace_buffer.buffer;
1829 if (buffer)
1830 ring_buffer_record_disable(buffer);
1831
1832 out:
1833 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1834 }
1835
1836 void trace_stop_cmdline_recording(void);
1837
1838 static int trace_save_cmdline(struct task_struct *tsk)
1839 {
1840 unsigned pid, idx;
1841
1842 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1843 return 0;
1844
1845 /*
1846 * It's not the end of the world if we don't get
1847 * the lock, but we also don't want to spin
1848 * nor do we want to disable interrupts,
1849 * so if we miss here, then better luck next time.
1850 */
1851 if (!arch_spin_trylock(&trace_cmdline_lock))
1852 return 0;
1853
1854 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1855 if (idx == NO_CMDLINE_MAP) {
1856 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1857
1858 /*
1859 * Check whether the cmdline buffer at idx has a pid
1860 * mapped. We are going to overwrite that entry so we
1861 * need to clear the map_pid_to_cmdline. Otherwise we
1862 * would read the new comm for the old pid.
1863 */
1864 pid = savedcmd->map_cmdline_to_pid[idx];
1865 if (pid != NO_CMDLINE_MAP)
1866 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1867
1868 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1869 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1870
1871 savedcmd->cmdline_idx = idx;
1872 }
1873
1874 set_cmdline(idx, tsk->comm);
1875
1876 arch_spin_unlock(&trace_cmdline_lock);
1877
1878 return 1;
1879 }
1880
1881 static void __trace_find_cmdline(int pid, char comm[])
1882 {
1883 unsigned map;
1884
1885 if (!pid) {
1886 strcpy(comm, "<idle>");
1887 return;
1888 }
1889
1890 if (WARN_ON_ONCE(pid < 0)) {
1891 strcpy(comm, "<XXX>");
1892 return;
1893 }
1894
1895 if (pid > PID_MAX_DEFAULT) {
1896 strcpy(comm, "<...>");
1897 return;
1898 }
1899
1900 map = savedcmd->map_pid_to_cmdline[pid];
1901 if (map != NO_CMDLINE_MAP)
1902 strcpy(comm, get_saved_cmdlines(map));
1903 else
1904 strcpy(comm, "<...>");
1905 }
1906
1907 void trace_find_cmdline(int pid, char comm[])
1908 {
1909 preempt_disable();
1910 arch_spin_lock(&trace_cmdline_lock);
1911
1912 __trace_find_cmdline(pid, comm);
1913
1914 arch_spin_unlock(&trace_cmdline_lock);
1915 preempt_enable();
1916 }
1917
1918 void tracing_record_cmdline(struct task_struct *tsk)
1919 {
1920 if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1921 return;
1922
1923 if (!__this_cpu_read(trace_cmdline_save))
1924 return;
1925
1926 if (trace_save_cmdline(tsk))
1927 __this_cpu_write(trace_cmdline_save, false);
1928 }
1929
1930 void
1931 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1932 int pc)
1933 {
1934 struct task_struct *tsk = current;
1935
1936 entry->preempt_count = pc & 0xff;
1937 entry->pid = (tsk) ? tsk->pid : 0;
1938 entry->flags =
1939 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1940 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1941 #else
1942 TRACE_FLAG_IRQS_NOSUPPORT |
1943 #endif
1944 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
1945 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1946 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1947 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1948 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1949 }
1950 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1951
1952 struct ring_buffer_event *
1953 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1954 int type,
1955 unsigned long len,
1956 unsigned long flags, int pc)
1957 {
1958 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
1959 }
1960
1961 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1962 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1963 static int trace_buffered_event_ref;
1964
1965 /**
1966 * trace_buffered_event_enable - enable buffering events
1967 *
1968 * When events are being filtered, it is quicker to use a temporary
1969 * buffer to write the event data into if there's a likely chance
1970 * that it will not be committed. The discard of the ring buffer
1971 * is not as fast as committing, and is much slower than copying
1972 * a commit.
1973 *
1974 * When an event is to be filtered, allocate per cpu buffers to
1975 * write the event data into, and if the event is filtered and discarded
1976 * it is simply dropped, otherwise, the entire data is to be committed
1977 * in one shot.
1978 */
1979 void trace_buffered_event_enable(void)
1980 {
1981 struct ring_buffer_event *event;
1982 struct page *page;
1983 int cpu;
1984
1985 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1986
1987 if (trace_buffered_event_ref++)
1988 return;
1989
1990 for_each_tracing_cpu(cpu) {
1991 page = alloc_pages_node(cpu_to_node(cpu),
1992 GFP_KERNEL | __GFP_NORETRY, 0);
1993 if (!page)
1994 goto failed;
1995
1996 event = page_address(page);
1997 memset(event, 0, sizeof(*event));
1998
1999 per_cpu(trace_buffered_event, cpu) = event;
2000
2001 preempt_disable();
2002 if (cpu == smp_processor_id() &&
2003 this_cpu_read(trace_buffered_event) !=
2004 per_cpu(trace_buffered_event, cpu))
2005 WARN_ON_ONCE(1);
2006 preempt_enable();
2007 }
2008
2009 return;
2010 failed:
2011 trace_buffered_event_disable();
2012 }
2013
2014 static void enable_trace_buffered_event(void *data)
2015 {
2016 /* Probably not needed, but do it anyway */
2017 smp_rmb();
2018 this_cpu_dec(trace_buffered_event_cnt);
2019 }
2020
2021 static void disable_trace_buffered_event(void *data)
2022 {
2023 this_cpu_inc(trace_buffered_event_cnt);
2024 }
2025
2026 /**
2027 * trace_buffered_event_disable - disable buffering events
2028 *
2029 * When a filter is removed, it is faster to not use the buffered
2030 * events, and to commit directly into the ring buffer. Free up
2031 * the temp buffers when there are no more users. This requires
2032 * special synchronization with current events.
2033 */
2034 void trace_buffered_event_disable(void)
2035 {
2036 int cpu;
2037
2038 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2039
2040 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2041 return;
2042
2043 if (--trace_buffered_event_ref)
2044 return;
2045
2046 preempt_disable();
2047 /* For each CPU, set the buffer as used. */
2048 smp_call_function_many(tracing_buffer_mask,
2049 disable_trace_buffered_event, NULL, 1);
2050 preempt_enable();
2051
2052 /* Wait for all current users to finish */
2053 synchronize_sched();
2054
2055 for_each_tracing_cpu(cpu) {
2056 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2057 per_cpu(trace_buffered_event, cpu) = NULL;
2058 }
2059 /*
2060 * Make sure trace_buffered_event is NULL before clearing
2061 * trace_buffered_event_cnt.
2062 */
2063 smp_wmb();
2064
2065 preempt_disable();
2066 /* Do the work on each cpu */
2067 smp_call_function_many(tracing_buffer_mask,
2068 enable_trace_buffered_event, NULL, 1);
2069 preempt_enable();
2070 }
2071
2072 static struct ring_buffer *temp_buffer;
2073
2074 struct ring_buffer_event *
2075 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2076 struct trace_event_file *trace_file,
2077 int type, unsigned long len,
2078 unsigned long flags, int pc)
2079 {
2080 struct ring_buffer_event *entry;
2081 int val;
2082
2083 *current_rb = trace_file->tr->trace_buffer.buffer;
2084
2085 if ((trace_file->flags &
2086 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2087 (entry = this_cpu_read(trace_buffered_event))) {
2088 /* Try to use the per cpu buffer first */
2089 val = this_cpu_inc_return(trace_buffered_event_cnt);
2090 if (val == 1) {
2091 trace_event_setup(entry, type, flags, pc);
2092 entry->array[0] = len;
2093 return entry;
2094 }
2095 this_cpu_dec(trace_buffered_event_cnt);
2096 }
2097
2098 entry = __trace_buffer_lock_reserve(*current_rb,
2099 type, len, flags, pc);
2100 /*
2101 * If tracing is off, but we have triggers enabled
2102 * we still need to look at the event data. Use the temp_buffer
2103 * to store the trace event for the tigger to use. It's recusive
2104 * safe and will not be recorded anywhere.
2105 */
2106 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2107 *current_rb = temp_buffer;
2108 entry = __trace_buffer_lock_reserve(*current_rb,
2109 type, len, flags, pc);
2110 }
2111 return entry;
2112 }
2113 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2114
2115 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2116 static DEFINE_MUTEX(tracepoint_printk_mutex);
2117
2118 static void output_printk(struct trace_event_buffer *fbuffer)
2119 {
2120 struct trace_event_call *event_call;
2121 struct trace_event *event;
2122 unsigned long flags;
2123 struct trace_iterator *iter = tracepoint_print_iter;
2124
2125 /* We should never get here if iter is NULL */
2126 if (WARN_ON_ONCE(!iter))
2127 return;
2128
2129 event_call = fbuffer->trace_file->event_call;
2130 if (!event_call || !event_call->event.funcs ||
2131 !event_call->event.funcs->trace)
2132 return;
2133
2134 event = &fbuffer->trace_file->event_call->event;
2135
2136 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2137 trace_seq_init(&iter->seq);
2138 iter->ent = fbuffer->entry;
2139 event_call->event.funcs->trace(iter, 0, event);
2140 trace_seq_putc(&iter->seq, 0);
2141 printk("%s", iter->seq.buffer);
2142
2143 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2144 }
2145
2146 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2147 void __user *buffer, size_t *lenp,
2148 loff_t *ppos)
2149 {
2150 int save_tracepoint_printk;
2151 int ret;
2152
2153 mutex_lock(&tracepoint_printk_mutex);
2154 save_tracepoint_printk = tracepoint_printk;
2155
2156 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2157
2158 /*
2159 * This will force exiting early, as tracepoint_printk
2160 * is always zero when tracepoint_printk_iter is not allocated
2161 */
2162 if (!tracepoint_print_iter)
2163 tracepoint_printk = 0;
2164
2165 if (save_tracepoint_printk == tracepoint_printk)
2166 goto out;
2167
2168 if (tracepoint_printk)
2169 static_key_enable(&tracepoint_printk_key.key);
2170 else
2171 static_key_disable(&tracepoint_printk_key.key);
2172
2173 out:
2174 mutex_unlock(&tracepoint_printk_mutex);
2175
2176 return ret;
2177 }
2178
2179 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2180 {
2181 if (static_key_false(&tracepoint_printk_key.key))
2182 output_printk(fbuffer);
2183
2184 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2185 fbuffer->event, fbuffer->entry,
2186 fbuffer->flags, fbuffer->pc);
2187 }
2188 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2189
2190 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2191 struct ring_buffer *buffer,
2192 struct ring_buffer_event *event,
2193 unsigned long flags, int pc,
2194 struct pt_regs *regs)
2195 {
2196 __buffer_unlock_commit(buffer, event);
2197
2198 /*
2199 * If regs is not set, then skip the following callers:
2200 * trace_buffer_unlock_commit_regs
2201 * event_trigger_unlock_commit
2202 * trace_event_buffer_commit
2203 * trace_event_raw_event_sched_switch
2204 * Note, we can still get here via blktrace, wakeup tracer
2205 * and mmiotrace, but that's ok if they lose a function or
2206 * two. They are that meaningful.
2207 */
2208 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2209 ftrace_trace_userstack(buffer, flags, pc);
2210 }
2211
2212 /*
2213 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2214 */
2215 void
2216 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2217 struct ring_buffer_event *event)
2218 {
2219 __buffer_unlock_commit(buffer, event);
2220 }
2221
2222 static void
2223 trace_process_export(struct trace_export *export,
2224 struct ring_buffer_event *event)
2225 {
2226 struct trace_entry *entry;
2227 unsigned int size = 0;
2228
2229 entry = ring_buffer_event_data(event);
2230 size = ring_buffer_event_length(event);
2231 export->write(entry, size);
2232 }
2233
2234 static DEFINE_MUTEX(ftrace_export_lock);
2235
2236 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2237
2238 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2239
2240 static inline void ftrace_exports_enable(void)
2241 {
2242 static_branch_enable(&ftrace_exports_enabled);
2243 }
2244
2245 static inline void ftrace_exports_disable(void)
2246 {
2247 static_branch_disable(&ftrace_exports_enabled);
2248 }
2249
2250 void ftrace_exports(struct ring_buffer_event *event)
2251 {
2252 struct trace_export *export;
2253
2254 preempt_disable_notrace();
2255
2256 export = rcu_dereference_raw_notrace(ftrace_exports_list);
2257 while (export) {
2258 trace_process_export(export, event);
2259 export = rcu_dereference_raw_notrace(export->next);
2260 }
2261
2262 preempt_enable_notrace();
2263 }
2264
2265 static inline void
2266 add_trace_export(struct trace_export **list, struct trace_export *export)
2267 {
2268 rcu_assign_pointer(export->next, *list);
2269 /*
2270 * We are entering export into the list but another
2271 * CPU might be walking that list. We need to make sure
2272 * the export->next pointer is valid before another CPU sees
2273 * the export pointer included into the list.
2274 */
2275 rcu_assign_pointer(*list, export);
2276 }
2277
2278 static inline int
2279 rm_trace_export(struct trace_export **list, struct trace_export *export)
2280 {
2281 struct trace_export **p;
2282
2283 for (p = list; *p != NULL; p = &(*p)->next)
2284 if (*p == export)
2285 break;
2286
2287 if (*p != export)
2288 return -1;
2289
2290 rcu_assign_pointer(*p, (*p)->next);
2291
2292 return 0;
2293 }
2294
2295 static inline void
2296 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2297 {
2298 if (*list == NULL)
2299 ftrace_exports_enable();
2300
2301 add_trace_export(list, export);
2302 }
2303
2304 static inline int
2305 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2306 {
2307 int ret;
2308
2309 ret = rm_trace_export(list, export);
2310 if (*list == NULL)
2311 ftrace_exports_disable();
2312
2313 return ret;
2314 }
2315
2316 int register_ftrace_export(struct trace_export *export)
2317 {
2318 if (WARN_ON_ONCE(!export->write))
2319 return -1;
2320
2321 mutex_lock(&ftrace_export_lock);
2322
2323 add_ftrace_export(&ftrace_exports_list, export);
2324
2325 mutex_unlock(&ftrace_export_lock);
2326
2327 return 0;
2328 }
2329 EXPORT_SYMBOL_GPL(register_ftrace_export);
2330
2331 int unregister_ftrace_export(struct trace_export *export)
2332 {
2333 int ret;
2334
2335 mutex_lock(&ftrace_export_lock);
2336
2337 ret = rm_ftrace_export(&ftrace_exports_list, export);
2338
2339 mutex_unlock(&ftrace_export_lock);
2340
2341 return ret;
2342 }
2343 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2344
2345 void
2346 trace_function(struct trace_array *tr,
2347 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2348 int pc)
2349 {
2350 struct trace_event_call *call = &event_function;
2351 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2352 struct ring_buffer_event *event;
2353 struct ftrace_entry *entry;
2354
2355 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2356 flags, pc);
2357 if (!event)
2358 return;
2359 entry = ring_buffer_event_data(event);
2360 entry->ip = ip;
2361 entry->parent_ip = parent_ip;
2362
2363 if (!call_filter_check_discard(call, entry, buffer, event)) {
2364 if (static_branch_unlikely(&ftrace_exports_enabled))
2365 ftrace_exports(event);
2366 __buffer_unlock_commit(buffer, event);
2367 }
2368 }
2369
2370 #ifdef CONFIG_STACKTRACE
2371
2372 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2373 struct ftrace_stack {
2374 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2375 };
2376
2377 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2378 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2379
2380 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2381 unsigned long flags,
2382 int skip, int pc, struct pt_regs *regs)
2383 {
2384 struct trace_event_call *call = &event_kernel_stack;
2385 struct ring_buffer_event *event;
2386 struct stack_entry *entry;
2387 struct stack_trace trace;
2388 int use_stack;
2389 int size = FTRACE_STACK_ENTRIES;
2390
2391 trace.nr_entries = 0;
2392 trace.skip = skip;
2393
2394 /*
2395 * Add two, for this function and the call to save_stack_trace()
2396 * If regs is set, then these functions will not be in the way.
2397 */
2398 if (!regs)
2399 trace.skip += 2;
2400
2401 /*
2402 * Since events can happen in NMIs there's no safe way to
2403 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2404 * or NMI comes in, it will just have to use the default
2405 * FTRACE_STACK_SIZE.
2406 */
2407 preempt_disable_notrace();
2408
2409 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2410 /*
2411 * We don't need any atomic variables, just a barrier.
2412 * If an interrupt comes in, we don't care, because it would
2413 * have exited and put the counter back to what we want.
2414 * We just need a barrier to keep gcc from moving things
2415 * around.
2416 */
2417 barrier();
2418 if (use_stack == 1) {
2419 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2420 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2421
2422 if (regs)
2423 save_stack_trace_regs(regs, &trace);
2424 else
2425 save_stack_trace(&trace);
2426
2427 if (trace.nr_entries > size)
2428 size = trace.nr_entries;
2429 } else
2430 /* From now on, use_stack is a boolean */
2431 use_stack = 0;
2432
2433 size *= sizeof(unsigned long);
2434
2435 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2436 sizeof(*entry) + size, flags, pc);
2437 if (!event)
2438 goto out;
2439 entry = ring_buffer_event_data(event);
2440
2441 memset(&entry->caller, 0, size);
2442
2443 if (use_stack)
2444 memcpy(&entry->caller, trace.entries,
2445 trace.nr_entries * sizeof(unsigned long));
2446 else {
2447 trace.max_entries = FTRACE_STACK_ENTRIES;
2448 trace.entries = entry->caller;
2449 if (regs)
2450 save_stack_trace_regs(regs, &trace);
2451 else
2452 save_stack_trace(&trace);
2453 }
2454
2455 entry->size = trace.nr_entries;
2456
2457 if (!call_filter_check_discard(call, entry, buffer, event))
2458 __buffer_unlock_commit(buffer, event);
2459
2460 out:
2461 /* Again, don't let gcc optimize things here */
2462 barrier();
2463 __this_cpu_dec(ftrace_stack_reserve);
2464 preempt_enable_notrace();
2465
2466 }
2467
2468 static inline void ftrace_trace_stack(struct trace_array *tr,
2469 struct ring_buffer *buffer,
2470 unsigned long flags,
2471 int skip, int pc, struct pt_regs *regs)
2472 {
2473 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2474 return;
2475
2476 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2477 }
2478
2479 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2480 int pc)
2481 {
2482 __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2483 }
2484
2485 /**
2486 * trace_dump_stack - record a stack back trace in the trace buffer
2487 * @skip: Number of functions to skip (helper handlers)
2488 */
2489 void trace_dump_stack(int skip)
2490 {
2491 unsigned long flags;
2492
2493 if (tracing_disabled || tracing_selftest_running)
2494 return;
2495
2496 local_save_flags(flags);
2497
2498 /*
2499 * Skip 3 more, seems to get us at the caller of
2500 * this function.
2501 */
2502 skip += 3;
2503 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2504 flags, skip, preempt_count(), NULL);
2505 }
2506
2507 static DEFINE_PER_CPU(int, user_stack_count);
2508
2509 void
2510 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2511 {
2512 struct trace_event_call *call = &event_user_stack;
2513 struct ring_buffer_event *event;
2514 struct userstack_entry *entry;
2515 struct stack_trace trace;
2516
2517 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2518 return;
2519
2520 /*
2521 * NMIs can not handle page faults, even with fix ups.
2522 * The save user stack can (and often does) fault.
2523 */
2524 if (unlikely(in_nmi()))
2525 return;
2526
2527 /*
2528 * prevent recursion, since the user stack tracing may
2529 * trigger other kernel events.
2530 */
2531 preempt_disable();
2532 if (__this_cpu_read(user_stack_count))
2533 goto out;
2534
2535 __this_cpu_inc(user_stack_count);
2536
2537 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2538 sizeof(*entry), flags, pc);
2539 if (!event)
2540 goto out_drop_count;
2541 entry = ring_buffer_event_data(event);
2542
2543 entry->tgid = current->tgid;
2544 memset(&entry->caller, 0, sizeof(entry->caller));
2545
2546 trace.nr_entries = 0;
2547 trace.max_entries = FTRACE_STACK_ENTRIES;
2548 trace.skip = 0;
2549 trace.entries = entry->caller;
2550
2551 save_stack_trace_user(&trace);
2552 if (!call_filter_check_discard(call, entry, buffer, event))
2553 __buffer_unlock_commit(buffer, event);
2554
2555 out_drop_count:
2556 __this_cpu_dec(user_stack_count);
2557 out:
2558 preempt_enable();
2559 }
2560
2561 #ifdef UNUSED
2562 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2563 {
2564 ftrace_trace_userstack(tr, flags, preempt_count());
2565 }
2566 #endif /* UNUSED */
2567
2568 #endif /* CONFIG_STACKTRACE */
2569
2570 /* created for use with alloc_percpu */
2571 struct trace_buffer_struct {
2572 int nesting;
2573 char buffer[4][TRACE_BUF_SIZE];
2574 };
2575
2576 static struct trace_buffer_struct *trace_percpu_buffer;
2577
2578 /*
2579 * Thise allows for lockless recording. If we're nested too deeply, then
2580 * this returns NULL.
2581 */
2582 static char *get_trace_buf(void)
2583 {
2584 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2585
2586 if (!buffer || buffer->nesting >= 4)
2587 return NULL;
2588
2589 return &buffer->buffer[buffer->nesting++][0];
2590 }
2591
2592 static void put_trace_buf(void)
2593 {
2594 this_cpu_dec(trace_percpu_buffer->nesting);
2595 }
2596
2597 static int alloc_percpu_trace_buffer(void)
2598 {
2599 struct trace_buffer_struct *buffers;
2600
2601 buffers = alloc_percpu(struct trace_buffer_struct);
2602 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2603 return -ENOMEM;
2604
2605 trace_percpu_buffer = buffers;
2606 return 0;
2607 }
2608
2609 static int buffers_allocated;
2610
2611 void trace_printk_init_buffers(void)
2612 {
2613 if (buffers_allocated)
2614 return;
2615
2616 if (alloc_percpu_trace_buffer())
2617 return;
2618
2619 /* trace_printk() is for debug use only. Don't use it in production. */
2620
2621 pr_warn("\n");
2622 pr_warn("**********************************************************\n");
2623 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2624 pr_warn("** **\n");
2625 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
2626 pr_warn("** **\n");
2627 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
2628 pr_warn("** unsafe for production use. **\n");
2629 pr_warn("** **\n");
2630 pr_warn("** If you see this message and you are not debugging **\n");
2631 pr_warn("** the kernel, report this immediately to your vendor! **\n");
2632 pr_warn("** **\n");
2633 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2634 pr_warn("**********************************************************\n");
2635
2636 /* Expand the buffers to set size */
2637 tracing_update_buffers();
2638
2639 buffers_allocated = 1;
2640
2641 /*
2642 * trace_printk_init_buffers() can be called by modules.
2643 * If that happens, then we need to start cmdline recording
2644 * directly here. If the global_trace.buffer is already
2645 * allocated here, then this was called by module code.
2646 */
2647 if (global_trace.trace_buffer.buffer)
2648 tracing_start_cmdline_record();
2649 }
2650
2651 void trace_printk_start_comm(void)
2652 {
2653 /* Start tracing comms if trace printk is set */
2654 if (!buffers_allocated)
2655 return;
2656 tracing_start_cmdline_record();
2657 }
2658
2659 static void trace_printk_start_stop_comm(int enabled)
2660 {
2661 if (!buffers_allocated)
2662 return;
2663
2664 if (enabled)
2665 tracing_start_cmdline_record();
2666 else
2667 tracing_stop_cmdline_record();
2668 }
2669
2670 /**
2671 * trace_vbprintk - write binary msg to tracing buffer
2672 *
2673 */
2674 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2675 {
2676 struct trace_event_call *call = &event_bprint;
2677 struct ring_buffer_event *event;
2678 struct ring_buffer *buffer;
2679 struct trace_array *tr = &global_trace;
2680 struct bprint_entry *entry;
2681 unsigned long flags;
2682 char *tbuffer;
2683 int len = 0, size, pc;
2684
2685 if (unlikely(tracing_selftest_running || tracing_disabled))
2686 return 0;
2687
2688 /* Don't pollute graph traces with trace_vprintk internals */
2689 pause_graph_tracing();
2690
2691 pc = preempt_count();
2692 preempt_disable_notrace();
2693
2694 tbuffer = get_trace_buf();
2695 if (!tbuffer) {
2696 len = 0;
2697 goto out_nobuffer;
2698 }
2699
2700 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2701
2702 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2703 goto out;
2704
2705 local_save_flags(flags);
2706 size = sizeof(*entry) + sizeof(u32) * len;
2707 buffer = tr->trace_buffer.buffer;
2708 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2709 flags, pc);
2710 if (!event)
2711 goto out;
2712 entry = ring_buffer_event_data(event);
2713 entry->ip = ip;
2714 entry->fmt = fmt;
2715
2716 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2717 if (!call_filter_check_discard(call, entry, buffer, event)) {
2718 __buffer_unlock_commit(buffer, event);
2719 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2720 }
2721
2722 out:
2723 put_trace_buf();
2724
2725 out_nobuffer:
2726 preempt_enable_notrace();
2727 unpause_graph_tracing();
2728
2729 return len;
2730 }
2731 EXPORT_SYMBOL_GPL(trace_vbprintk);
2732
2733 static int
2734 __trace_array_vprintk(struct ring_buffer *buffer,
2735 unsigned long ip, const char *fmt, va_list args)
2736 {
2737 struct trace_event_call *call = &event_print;
2738 struct ring_buffer_event *event;
2739 int len = 0, size, pc;
2740 struct print_entry *entry;
2741 unsigned long flags;
2742 char *tbuffer;
2743
2744 if (tracing_disabled || tracing_selftest_running)
2745 return 0;
2746
2747 /* Don't pollute graph traces with trace_vprintk internals */
2748 pause_graph_tracing();
2749
2750 pc = preempt_count();
2751 preempt_disable_notrace();
2752
2753
2754 tbuffer = get_trace_buf();
2755 if (!tbuffer) {
2756 len = 0;
2757 goto out_nobuffer;
2758 }
2759
2760 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2761
2762 local_save_flags(flags);
2763 size = sizeof(*entry) + len + 1;
2764 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2765 flags, pc);
2766 if (!event)
2767 goto out;
2768 entry = ring_buffer_event_data(event);
2769 entry->ip = ip;
2770
2771 memcpy(&entry->buf, tbuffer, len + 1);
2772 if (!call_filter_check_discard(call, entry, buffer, event)) {
2773 __buffer_unlock_commit(buffer, event);
2774 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2775 }
2776
2777 out:
2778 put_trace_buf();
2779
2780 out_nobuffer:
2781 preempt_enable_notrace();
2782 unpause_graph_tracing();
2783
2784 return len;
2785 }
2786
2787 int trace_array_vprintk(struct trace_array *tr,
2788 unsigned long ip, const char *fmt, va_list args)
2789 {
2790 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2791 }
2792
2793 int trace_array_printk(struct trace_array *tr,
2794 unsigned long ip, const char *fmt, ...)
2795 {
2796 int ret;
2797 va_list ap;
2798
2799 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2800 return 0;
2801
2802 va_start(ap, fmt);
2803 ret = trace_array_vprintk(tr, ip, fmt, ap);
2804 va_end(ap);
2805 return ret;
2806 }
2807
2808 int trace_array_printk_buf(struct ring_buffer *buffer,
2809 unsigned long ip, const char *fmt, ...)
2810 {
2811 int ret;
2812 va_list ap;
2813
2814 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2815 return 0;
2816
2817 va_start(ap, fmt);
2818 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2819 va_end(ap);
2820 return ret;
2821 }
2822
2823 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2824 {
2825 return trace_array_vprintk(&global_trace, ip, fmt, args);
2826 }
2827 EXPORT_SYMBOL_GPL(trace_vprintk);
2828
2829 static void trace_iterator_increment(struct trace_iterator *iter)
2830 {
2831 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2832
2833 iter->idx++;
2834 if (buf_iter)
2835 ring_buffer_read(buf_iter, NULL);
2836 }
2837
2838 static struct trace_entry *
2839 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2840 unsigned long *lost_events)
2841 {
2842 struct ring_buffer_event *event;
2843 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2844
2845 if (buf_iter)
2846 event = ring_buffer_iter_peek(buf_iter, ts);
2847 else
2848 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2849 lost_events);
2850
2851 if (event) {
2852 iter->ent_size = ring_buffer_event_length(event);
2853 return ring_buffer_event_data(event);
2854 }
2855 iter->ent_size = 0;
2856 return NULL;
2857 }
2858
2859 static struct trace_entry *
2860 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2861 unsigned long *missing_events, u64 *ent_ts)
2862 {
2863 struct ring_buffer *buffer = iter->trace_buffer->buffer;
2864 struct trace_entry *ent, *next = NULL;
2865 unsigned long lost_events = 0, next_lost = 0;
2866 int cpu_file = iter->cpu_file;
2867 u64 next_ts = 0, ts;
2868 int next_cpu = -1;
2869 int next_size = 0;
2870 int cpu;
2871
2872 /*
2873 * If we are in a per_cpu trace file, don't bother by iterating over
2874 * all cpu and peek directly.
2875 */
2876 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2877 if (ring_buffer_empty_cpu(buffer, cpu_file))
2878 return NULL;
2879 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2880 if (ent_cpu)
2881 *ent_cpu = cpu_file;
2882
2883 return ent;
2884 }
2885
2886 for_each_tracing_cpu(cpu) {
2887
2888 if (ring_buffer_empty_cpu(buffer, cpu))
2889 continue;
2890
2891 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2892
2893 /*
2894 * Pick the entry with the smallest timestamp:
2895 */
2896 if (ent && (!next || ts < next_ts)) {
2897 next = ent;
2898 next_cpu = cpu;
2899 next_ts = ts;
2900 next_lost = lost_events;
2901 next_size = iter->ent_size;
2902 }
2903 }
2904
2905 iter->ent_size = next_size;
2906
2907 if (ent_cpu)
2908 *ent_cpu = next_cpu;
2909
2910 if (ent_ts)
2911 *ent_ts = next_ts;
2912
2913 if (missing_events)
2914 *missing_events = next_lost;
2915
2916 return next;
2917 }
2918
2919 /* Find the next real entry, without updating the iterator itself */
2920 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2921 int *ent_cpu, u64 *ent_ts)
2922 {
2923 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2924 }
2925
2926 /* Find the next real entry, and increment the iterator to the next entry */
2927 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2928 {
2929 iter->ent = __find_next_entry(iter, &iter->cpu,
2930 &iter->lost_events, &iter->ts);
2931
2932 if (iter->ent)
2933 trace_iterator_increment(iter);
2934
2935 return iter->ent ? iter : NULL;
2936 }
2937
2938 static void trace_consume(struct trace_iterator *iter)
2939 {
2940 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2941 &iter->lost_events);
2942 }
2943
2944 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2945 {
2946 struct trace_iterator *iter = m->private;
2947 int i = (int)*pos;
2948 void *ent;
2949
2950 WARN_ON_ONCE(iter->leftover);
2951
2952 (*pos)++;
2953
2954 /* can't go backwards */
2955 if (iter->idx > i)
2956 return NULL;
2957
2958 if (iter->idx < 0)
2959 ent = trace_find_next_entry_inc(iter);
2960 else
2961 ent = iter;
2962
2963 while (ent && iter->idx < i)
2964 ent = trace_find_next_entry_inc(iter);
2965
2966 iter->pos = *pos;
2967
2968 return ent;
2969 }
2970
2971 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2972 {
2973 struct ring_buffer_event *event;
2974 struct ring_buffer_iter *buf_iter;
2975 unsigned long entries = 0;
2976 u64 ts;
2977
2978 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2979
2980 buf_iter = trace_buffer_iter(iter, cpu);
2981 if (!buf_iter)
2982 return;
2983
2984 ring_buffer_iter_reset(buf_iter);
2985
2986 /*
2987 * We could have the case with the max latency tracers
2988 * that a reset never took place on a cpu. This is evident
2989 * by the timestamp being before the start of the buffer.
2990 */
2991 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2992 if (ts >= iter->trace_buffer->time_start)
2993 break;
2994 entries++;
2995 ring_buffer_read(buf_iter, NULL);
2996 }
2997
2998 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2999 }
3000
3001 /*
3002 * The current tracer is copied to avoid a global locking
3003 * all around.
3004 */
3005 static void *s_start(struct seq_file *m, loff_t *pos)
3006 {
3007 struct trace_iterator *iter = m->private;
3008 struct trace_array *tr = iter->tr;
3009 int cpu_file = iter->cpu_file;
3010 void *p = NULL;
3011 loff_t l = 0;
3012 int cpu;
3013
3014 /*
3015 * copy the tracer to avoid using a global lock all around.
3016 * iter->trace is a copy of current_trace, the pointer to the
3017 * name may be used instead of a strcmp(), as iter->trace->name
3018 * will point to the same string as current_trace->name.
3019 */
3020 mutex_lock(&trace_types_lock);
3021 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3022 *iter->trace = *tr->current_trace;
3023 mutex_unlock(&trace_types_lock);
3024
3025 #ifdef CONFIG_TRACER_MAX_TRACE
3026 if (iter->snapshot && iter->trace->use_max_tr)
3027 return ERR_PTR(-EBUSY);
3028 #endif
3029
3030 if (!iter->snapshot)
3031 atomic_inc(&trace_record_cmdline_disabled);
3032
3033 if (*pos != iter->pos) {
3034 iter->ent = NULL;
3035 iter->cpu = 0;
3036 iter->idx = -1;
3037
3038 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3039 for_each_tracing_cpu(cpu)
3040 tracing_iter_reset(iter, cpu);
3041 } else
3042 tracing_iter_reset(iter, cpu_file);
3043
3044 iter->leftover = 0;
3045 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3046 ;
3047
3048 } else {
3049 /*
3050 * If we overflowed the seq_file before, then we want
3051 * to just reuse the trace_seq buffer again.
3052 */
3053 if (iter->leftover)
3054 p = iter;
3055 else {
3056 l = *pos - 1;
3057 p = s_next(m, p, &l);
3058 }
3059 }
3060
3061 trace_event_read_lock();
3062 trace_access_lock(cpu_file);
3063 return p;
3064 }
3065
3066 static void s_stop(struct seq_file *m, void *p)
3067 {
3068 struct trace_iterator *iter = m->private;
3069
3070 #ifdef CONFIG_TRACER_MAX_TRACE
3071 if (iter->snapshot && iter->trace->use_max_tr)
3072 return;
3073 #endif
3074
3075 if (!iter->snapshot)
3076 atomic_dec(&trace_record_cmdline_disabled);
3077
3078 trace_access_unlock(iter->cpu_file);
3079 trace_event_read_unlock();
3080 }
3081
3082 static void
3083 get_total_entries(struct trace_buffer *buf,
3084 unsigned long *total, unsigned long *entries)
3085 {
3086 unsigned long count;
3087 int cpu;
3088
3089 *total = 0;
3090 *entries = 0;
3091
3092 for_each_tracing_cpu(cpu) {
3093 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3094 /*
3095 * If this buffer has skipped entries, then we hold all
3096 * entries for the trace and we need to ignore the
3097 * ones before the time stamp.
3098 */
3099 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3100 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3101 /* total is the same as the entries */
3102 *total += count;
3103 } else
3104 *total += count +
3105 ring_buffer_overrun_cpu(buf->buffer, cpu);
3106 *entries += count;
3107 }
3108 }
3109
3110 static void print_lat_help_header(struct seq_file *m)
3111 {
3112 seq_puts(m, "# _------=> CPU# \n"
3113 "# / _-----=> irqs-off \n"
3114 "# | / _----=> need-resched \n"
3115 "# || / _---=> hardirq/softirq \n"
3116 "# ||| / _--=> preempt-depth \n"
3117 "# |||| / delay \n"
3118 "# cmd pid ||||| time | caller \n"
3119 "# \\ / ||||| \\ | / \n");
3120 }
3121
3122 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3123 {
3124 unsigned long total;
3125 unsigned long entries;
3126
3127 get_total_entries(buf, &total, &entries);
3128 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3129 entries, total, num_online_cpus());
3130 seq_puts(m, "#\n");
3131 }
3132
3133 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3134 {
3135 print_event_info(buf, m);
3136 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"
3137 "# | | | | |\n");
3138 }
3139
3140 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3141 {
3142 print_event_info(buf, m);
3143 seq_puts(m, "# _-----=> irqs-off\n"
3144 "# / _----=> need-resched\n"
3145 "# | / _---=> hardirq/softirq\n"
3146 "# || / _--=> preempt-depth\n"
3147 "# ||| / delay\n"
3148 "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
3149 "# | | | |||| | |\n");
3150 }
3151
3152 void
3153 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3154 {
3155 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3156 struct trace_buffer *buf = iter->trace_buffer;
3157 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3158 struct tracer *type = iter->trace;
3159 unsigned long entries;
3160 unsigned long total;
3161 const char *name = "preemption";
3162
3163 name = type->name;
3164
3165 get_total_entries(buf, &total, &entries);
3166
3167 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3168 name, UTS_RELEASE);
3169 seq_puts(m, "# -----------------------------------"
3170 "---------------------------------\n");
3171 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3172 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3173 nsecs_to_usecs(data->saved_latency),
3174 entries,
3175 total,
3176 buf->cpu,
3177 #if defined(CONFIG_PREEMPT_NONE)
3178 "server",
3179 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3180 "desktop",
3181 #elif defined(CONFIG_PREEMPT)
3182 "preempt",
3183 #else
3184 "unknown",
3185 #endif
3186 /* These are reserved for later use */
3187 0, 0, 0, 0);
3188 #ifdef CONFIG_SMP
3189 seq_printf(m, " #P:%d)\n", num_online_cpus());
3190 #else
3191 seq_puts(m, ")\n");
3192 #endif
3193 seq_puts(m, "# -----------------\n");
3194 seq_printf(m, "# | task: %.16s-%d "
3195 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3196 data->comm, data->pid,
3197 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3198 data->policy, data->rt_priority);
3199 seq_puts(m, "# -----------------\n");
3200
3201 if (data->critical_start) {
3202 seq_puts(m, "# => started at: ");
3203 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3204 trace_print_seq(m, &iter->seq);
3205 seq_puts(m, "\n# => ended at: ");
3206 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3207 trace_print_seq(m, &iter->seq);
3208 seq_puts(m, "\n#\n");
3209 }
3210
3211 seq_puts(m, "#\n");
3212 }
3213
3214 static void test_cpu_buff_start(struct trace_iterator *iter)
3215 {
3216 struct trace_seq *s = &iter->seq;
3217 struct trace_array *tr = iter->tr;
3218
3219 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3220 return;
3221
3222 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3223 return;
3224
3225 if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3226 return;
3227
3228 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3229 return;
3230
3231 if (iter->started)
3232 cpumask_set_cpu(iter->cpu, iter->started);
3233
3234 /* Don't print started cpu buffer for the first entry of the trace */
3235 if (iter->idx > 1)
3236 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3237 iter->cpu);
3238 }
3239
3240 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3241 {
3242 struct trace_array *tr = iter->tr;
3243 struct trace_seq *s = &iter->seq;
3244 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3245 struct trace_entry *entry;
3246 struct trace_event *event;
3247
3248 entry = iter->ent;
3249
3250 test_cpu_buff_start(iter);
3251
3252 event = ftrace_find_event(entry->type);
3253
3254 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3255 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3256 trace_print_lat_context(iter);
3257 else
3258 trace_print_context(iter);
3259 }
3260
3261 if (trace_seq_has_overflowed(s))
3262 return TRACE_TYPE_PARTIAL_LINE;
3263
3264 if (event)
3265 return event->funcs->trace(iter, sym_flags, event);
3266
3267 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3268
3269 return trace_handle_return(s);
3270 }
3271
3272 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3273 {
3274 struct trace_array *tr = iter->tr;
3275 struct trace_seq *s = &iter->seq;
3276 struct trace_entry *entry;
3277 struct trace_event *event;
3278
3279 entry = iter->ent;
3280
3281 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3282 trace_seq_printf(s, "%d %d %llu ",
3283 entry->pid, iter->cpu, iter->ts);
3284
3285 if (trace_seq_has_overflowed(s))
3286 return TRACE_TYPE_PARTIAL_LINE;
3287
3288 event = ftrace_find_event(entry->type);
3289 if (event)
3290 return event->funcs->raw(iter, 0, event);
3291
3292 trace_seq_printf(s, "%d ?\n", entry->type);
3293
3294 return trace_handle_return(s);
3295 }
3296
3297 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3298 {
3299 struct trace_array *tr = iter->tr;
3300 struct trace_seq *s = &iter->seq;
3301 unsigned char newline = '\n';
3302 struct trace_entry *entry;
3303 struct trace_event *event;
3304
3305 entry = iter->ent;
3306
3307 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3308 SEQ_PUT_HEX_FIELD(s, entry->pid);
3309 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3310 SEQ_PUT_HEX_FIELD(s, iter->ts);
3311 if (trace_seq_has_overflowed(s))
3312 return TRACE_TYPE_PARTIAL_LINE;
3313 }
3314
3315 event = ftrace_find_event(entry->type);
3316 if (event) {
3317 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3318 if (ret != TRACE_TYPE_HANDLED)
3319 return ret;
3320 }
3321
3322 SEQ_PUT_FIELD(s, newline);
3323
3324 return trace_handle_return(s);
3325 }
3326
3327 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3328 {
3329 struct trace_array *tr = iter->tr;
3330 struct trace_seq *s = &iter->seq;
3331 struct trace_entry *entry;
3332 struct trace_event *event;
3333
3334 entry = iter->ent;
3335
3336 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3337 SEQ_PUT_FIELD(s, entry->pid);
3338 SEQ_PUT_FIELD(s, iter->cpu);
3339 SEQ_PUT_FIELD(s, iter->ts);
3340 if (trace_seq_has_overflowed(s))
3341 return TRACE_TYPE_PARTIAL_LINE;
3342 }
3343
3344 event = ftrace_find_event(entry->type);
3345 return event ? event->funcs->binary(iter, 0, event) :
3346 TRACE_TYPE_HANDLED;
3347 }
3348
3349 int trace_empty(struct trace_iterator *iter)
3350 {
3351 struct ring_buffer_iter *buf_iter;
3352 int cpu;
3353
3354 /* If we are looking at one CPU buffer, only check that one */
3355 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3356 cpu = iter->cpu_file;
3357 buf_iter = trace_buffer_iter(iter, cpu);
3358 if (buf_iter) {
3359 if (!ring_buffer_iter_empty(buf_iter))
3360 return 0;
3361 } else {
3362 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3363 return 0;
3364 }
3365 return 1;
3366 }
3367
3368 for_each_tracing_cpu(cpu) {
3369 buf_iter = trace_buffer_iter(iter, cpu);
3370 if (buf_iter) {
3371 if (!ring_buffer_iter_empty(buf_iter))
3372 return 0;
3373 } else {
3374 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3375 return 0;
3376 }
3377 }
3378
3379 return 1;
3380 }
3381
3382 /* Called with trace_event_read_lock() held. */
3383 enum print_line_t print_trace_line(struct trace_iterator *iter)
3384 {
3385 struct trace_array *tr = iter->tr;
3386 unsigned long trace_flags = tr->trace_flags;
3387 enum print_line_t ret;
3388
3389 if (iter->lost_events) {
3390 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3391 iter->cpu, iter->lost_events);
3392 if (trace_seq_has_overflowed(&iter->seq))
3393 return TRACE_TYPE_PARTIAL_LINE;
3394 }
3395
3396 if (iter->trace && iter->trace->print_line) {
3397 ret = iter->trace->print_line(iter);
3398 if (ret != TRACE_TYPE_UNHANDLED)
3399 return ret;
3400 }
3401
3402 if (iter->ent->type == TRACE_BPUTS &&
3403 trace_flags & TRACE_ITER_PRINTK &&
3404 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3405 return trace_print_bputs_msg_only(iter);
3406
3407 if (iter->ent->type == TRACE_BPRINT &&
3408 trace_flags & TRACE_ITER_PRINTK &&
3409 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3410 return trace_print_bprintk_msg_only(iter);
3411
3412 if (iter->ent->type == TRACE_PRINT &&
3413 trace_flags & TRACE_ITER_PRINTK &&
3414 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3415 return trace_print_printk_msg_only(iter);
3416
3417 if (trace_flags & TRACE_ITER_BIN)
3418 return print_bin_fmt(iter);
3419
3420 if (trace_flags & TRACE_ITER_HEX)
3421 return print_hex_fmt(iter);
3422
3423 if (trace_flags & TRACE_ITER_RAW)
3424 return print_raw_fmt(iter);
3425
3426 return print_trace_fmt(iter);
3427 }
3428
3429 void trace_latency_header(struct seq_file *m)
3430 {
3431 struct trace_iterator *iter = m->private;
3432 struct trace_array *tr = iter->tr;
3433
3434 /* print nothing if the buffers are empty */
3435 if (trace_empty(iter))
3436 return;
3437
3438 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3439 print_trace_header(m, iter);
3440
3441 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3442 print_lat_help_header(m);
3443 }
3444
3445 void trace_default_header(struct seq_file *m)
3446 {
3447 struct trace_iterator *iter = m->private;
3448 struct trace_array *tr = iter->tr;
3449 unsigned long trace_flags = tr->trace_flags;
3450
3451 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3452 return;
3453
3454 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3455 /* print nothing if the buffers are empty */
3456 if (trace_empty(iter))
3457 return;
3458 print_trace_header(m, iter);
3459 if (!(trace_flags & TRACE_ITER_VERBOSE))
3460 print_lat_help_header(m);
3461 } else {
3462 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3463 if (trace_flags & TRACE_ITER_IRQ_INFO)
3464 print_func_help_header_irq(iter->trace_buffer, m);
3465 else
3466 print_func_help_header(iter->trace_buffer, m);
3467 }
3468 }
3469 }
3470
3471 static void test_ftrace_alive(struct seq_file *m)
3472 {
3473 if (!ftrace_is_dead())
3474 return;
3475 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3476 "# MAY BE MISSING FUNCTION EVENTS\n");
3477 }
3478
3479 #ifdef CONFIG_TRACER_MAX_TRACE
3480 static void show_snapshot_main_help(struct seq_file *m)
3481 {
3482 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3483 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3484 "# Takes a snapshot of the main buffer.\n"
3485 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3486 "# (Doesn't have to be '2' works with any number that\n"
3487 "# is not a '0' or '1')\n");
3488 }
3489
3490 static void show_snapshot_percpu_help(struct seq_file *m)
3491 {
3492 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3493 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3494 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3495 "# Takes a snapshot of the main buffer for this cpu.\n");
3496 #else
3497 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3498 "# Must use main snapshot file to allocate.\n");
3499 #endif
3500 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3501 "# (Doesn't have to be '2' works with any number that\n"
3502 "# is not a '0' or '1')\n");
3503 }
3504
3505 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3506 {
3507 if (iter->tr->allocated_snapshot)
3508 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3509 else
3510 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3511
3512 seq_puts(m, "# Snapshot commands:\n");
3513 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3514 show_snapshot_main_help(m);
3515 else
3516 show_snapshot_percpu_help(m);
3517 }
3518 #else
3519 /* Should never be called */
3520 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3521 #endif
3522
3523 static int s_show(struct seq_file *m, void *v)
3524 {
3525 struct trace_iterator *iter = v;
3526 int ret;
3527
3528 if (iter->ent == NULL) {
3529 if (iter->tr) {
3530 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3531 seq_puts(m, "#\n");
3532 test_ftrace_alive(m);
3533 }
3534 if (iter->snapshot && trace_empty(iter))
3535 print_snapshot_help(m, iter);
3536 else if (iter->trace && iter->trace->print_header)
3537 iter->trace->print_header(m);
3538 else
3539 trace_default_header(m);
3540
3541 } else if (iter->leftover) {
3542 /*
3543 * If we filled the seq_file buffer earlier, we
3544 * want to just show it now.
3545 */
3546 ret = trace_print_seq(m, &iter->seq);
3547
3548 /* ret should this time be zero, but you never know */
3549 iter->leftover = ret;
3550
3551 } else {
3552 print_trace_line(iter);
3553 ret = trace_print_seq(m, &iter->seq);
3554 /*
3555 * If we overflow the seq_file buffer, then it will
3556 * ask us for this data again at start up.
3557 * Use that instead.
3558 * ret is 0 if seq_file write succeeded.
3559 * -1 otherwise.
3560 */
3561 iter->leftover = ret;
3562 }
3563
3564 return 0;
3565 }
3566
3567 /*
3568 * Should be used after trace_array_get(), trace_types_lock
3569 * ensures that i_cdev was already initialized.
3570 */
3571 static inline int tracing_get_cpu(struct inode *inode)
3572 {
3573 if (inode->i_cdev) /* See trace_create_cpu_file() */
3574 return (long)inode->i_cdev - 1;
3575 return RING_BUFFER_ALL_CPUS;
3576 }
3577
3578 static const struct seq_operations tracer_seq_ops = {
3579 .start = s_start,
3580 .next = s_next,
3581 .stop = s_stop,
3582 .show = s_show,
3583 };
3584
3585 static struct trace_iterator *
3586 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3587 {
3588 struct trace_array *tr = inode->i_private;
3589 struct trace_iterator *iter;
3590 int cpu;
3591
3592 if (tracing_disabled)
3593 return ERR_PTR(-ENODEV);
3594
3595 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3596 if (!iter)
3597 return ERR_PTR(-ENOMEM);
3598
3599 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3600 GFP_KERNEL);
3601 if (!iter->buffer_iter)
3602 goto release;
3603
3604 /*
3605 * We make a copy of the current tracer to avoid concurrent
3606 * changes on it while we are reading.
3607 */
3608 mutex_lock(&trace_types_lock);
3609 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3610 if (!iter->trace)
3611 goto fail;
3612
3613 *iter->trace = *tr->current_trace;
3614
3615 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3616 goto fail;
3617
3618 iter->tr = tr;
3619
3620 #ifdef CONFIG_TRACER_MAX_TRACE
3621 /* Currently only the top directory has a snapshot */
3622 if (tr->current_trace->print_max || snapshot)
3623 iter->trace_buffer = &tr->max_buffer;
3624 else
3625 #endif
3626 iter->trace_buffer = &tr->trace_buffer;
3627 iter->snapshot = snapshot;
3628 iter->pos = -1;
3629 iter->cpu_file = tracing_get_cpu(inode);
3630 mutex_init(&iter->mutex);
3631
3632 /* Notify the tracer early; before we stop tracing. */
3633 if (iter->trace && iter->trace->open)
3634 iter->trace->open(iter);
3635
3636 /* Annotate start of buffers if we had overruns */
3637 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3638 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3639
3640 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3641 if (trace_clocks[tr->clock_id].in_ns)
3642 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3643
3644 /* stop the trace while dumping if we are not opening "snapshot" */
3645 if (!iter->snapshot)
3646 tracing_stop_tr(tr);
3647
3648 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3649 for_each_tracing_cpu(cpu) {
3650 iter->buffer_iter[cpu] =
3651 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3652 }
3653 ring_buffer_read_prepare_sync();
3654 for_each_tracing_cpu(cpu) {
3655 ring_buffer_read_start(iter->buffer_iter[cpu]);
3656 tracing_iter_reset(iter, cpu);
3657 }
3658 } else {
3659 cpu = iter->cpu_file;
3660 iter->buffer_iter[cpu] =
3661 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3662 ring_buffer_read_prepare_sync();
3663 ring_buffer_read_start(iter->buffer_iter[cpu]);
3664 tracing_iter_reset(iter, cpu);
3665 }
3666
3667 mutex_unlock(&trace_types_lock);
3668
3669 return iter;
3670
3671 fail:
3672 mutex_unlock(&trace_types_lock);
3673 kfree(iter->trace);
3674 kfree(iter->buffer_iter);
3675 release:
3676 seq_release_private(inode, file);
3677 return ERR_PTR(-ENOMEM);
3678 }
3679
3680 int tracing_open_generic(struct inode *inode, struct file *filp)
3681 {
3682 if (tracing_disabled)
3683 return -ENODEV;
3684
3685 filp->private_data = inode->i_private;
3686 return 0;
3687 }
3688
3689 bool tracing_is_disabled(void)
3690 {
3691 return (tracing_disabled) ? true: false;
3692 }
3693
3694 /*
3695 * Open and update trace_array ref count.
3696 * Must have the current trace_array passed to it.
3697 */
3698 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3699 {
3700 struct trace_array *tr = inode->i_private;
3701
3702 if (tracing_disabled)
3703 return -ENODEV;
3704
3705 if (trace_array_get(tr) < 0)
3706 return -ENODEV;
3707
3708 filp->private_data = inode->i_private;
3709
3710 return 0;
3711 }
3712
3713 static int tracing_release(struct inode *inode, struct file *file)
3714 {
3715 struct trace_array *tr = inode->i_private;
3716 struct seq_file *m = file->private_data;
3717 struct trace_iterator *iter;
3718 int cpu;
3719
3720 if (!(file->f_mode & FMODE_READ)) {
3721 trace_array_put(tr);
3722 return 0;
3723 }
3724
3725 /* Writes do not use seq_file */
3726 iter = m->private;
3727 mutex_lock(&trace_types_lock);
3728
3729 for_each_tracing_cpu(cpu) {
3730 if (iter->buffer_iter[cpu])
3731 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3732 }
3733
3734 if (iter->trace && iter->trace->close)
3735 iter->trace->close(iter);
3736
3737 if (!iter->snapshot)
3738 /* reenable tracing if it was previously enabled */
3739 tracing_start_tr(tr);
3740
3741 __trace_array_put(tr);
3742
3743 mutex_unlock(&trace_types_lock);
3744
3745 mutex_destroy(&iter->mutex);
3746 free_cpumask_var(iter->started);
3747 kfree(iter->trace);
3748 kfree(iter->buffer_iter);
3749 seq_release_private(inode, file);
3750
3751 return 0;
3752 }
3753
3754 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3755 {
3756 struct trace_array *tr = inode->i_private;
3757
3758 trace_array_put(tr);
3759 return 0;
3760 }
3761
3762 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3763 {
3764 struct trace_array *tr = inode->i_private;
3765
3766 trace_array_put(tr);
3767
3768 return single_release(inode, file);
3769 }
3770
3771 static int tracing_open(struct inode *inode, struct file *file)
3772 {
3773 struct trace_array *tr = inode->i_private;
3774 struct trace_iterator *iter;
3775 int ret = 0;
3776
3777 if (trace_array_get(tr) < 0)
3778 return -ENODEV;
3779
3780 /* If this file was open for write, then erase contents */
3781 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3782 int cpu = tracing_get_cpu(inode);
3783
3784 if (cpu == RING_BUFFER_ALL_CPUS)
3785 tracing_reset_online_cpus(&tr->trace_buffer);
3786 else
3787 tracing_reset(&tr->trace_buffer, cpu);
3788 }
3789
3790 if (file->f_mode & FMODE_READ) {
3791 iter = __tracing_open(inode, file, false);
3792 if (IS_ERR(iter))
3793 ret = PTR_ERR(iter);
3794 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3795 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3796 }
3797
3798 if (ret < 0)
3799 trace_array_put(tr);
3800
3801 return ret;
3802 }
3803
3804 /*
3805 * Some tracers are not suitable for instance buffers.
3806 * A tracer is always available for the global array (toplevel)
3807 * or if it explicitly states that it is.
3808 */
3809 static bool
3810 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3811 {
3812 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3813 }
3814
3815 /* Find the next tracer that this trace array may use */
3816 static struct tracer *
3817 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3818 {
3819 while (t && !trace_ok_for_array(t, tr))
3820 t = t->next;
3821
3822 return t;
3823 }
3824
3825 static void *
3826 t_next(struct seq_file *m, void *v, loff_t *pos)
3827 {
3828 struct trace_array *tr = m->private;
3829 struct tracer *t = v;
3830
3831 (*pos)++;
3832
3833 if (t)
3834 t = get_tracer_for_array(tr, t->next);
3835
3836 return t;
3837 }
3838
3839 static void *t_start(struct seq_file *m, loff_t *pos)
3840 {
3841 struct trace_array *tr = m->private;
3842 struct tracer *t;
3843 loff_t l = 0;
3844
3845 mutex_lock(&trace_types_lock);
3846
3847 t = get_tracer_for_array(tr, trace_types);
3848 for (; t && l < *pos; t = t_next(m, t, &l))
3849 ;
3850
3851 return t;
3852 }
3853
3854 static void t_stop(struct seq_file *m, void *p)
3855 {
3856 mutex_unlock(&trace_types_lock);
3857 }
3858
3859 static int t_show(struct seq_file *m, void *v)
3860 {
3861 struct tracer *t = v;
3862
3863 if (!t)
3864 return 0;
3865
3866 seq_puts(m, t->name);
3867 if (t->next)
3868 seq_putc(m, ' ');
3869 else
3870 seq_putc(m, '\n');
3871
3872 return 0;
3873 }
3874
3875 static const struct seq_operations show_traces_seq_ops = {
3876 .start = t_start,
3877 .next = t_next,
3878 .stop = t_stop,
3879 .show = t_show,
3880 };
3881
3882 static int show_traces_open(struct inode *inode, struct file *file)
3883 {
3884 struct trace_array *tr = inode->i_private;
3885 struct seq_file *m;
3886 int ret;
3887
3888 if (tracing_disabled)
3889 return -ENODEV;
3890
3891 ret = seq_open(file, &show_traces_seq_ops);
3892 if (ret)
3893 return ret;
3894
3895 m = file->private_data;
3896 m->private = tr;
3897
3898 return 0;
3899 }
3900
3901 static ssize_t
3902 tracing_write_stub(struct file *filp, const char __user *ubuf,
3903 size_t count, loff_t *ppos)
3904 {
3905 return count;
3906 }
3907
3908 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3909 {
3910 int ret;
3911
3912 if (file->f_mode & FMODE_READ)
3913 ret = seq_lseek(file, offset, whence);
3914 else
3915 file->f_pos = ret = 0;
3916
3917 return ret;
3918 }
3919
3920 static const struct file_operations tracing_fops = {
3921 .open = tracing_open,
3922 .read = seq_read,
3923 .write = tracing_write_stub,
3924 .llseek = tracing_lseek,
3925 .release = tracing_release,
3926 };
3927
3928 static const struct file_operations show_traces_fops = {
3929 .open = show_traces_open,
3930 .read = seq_read,
3931 .release = seq_release,
3932 .llseek = seq_lseek,
3933 };
3934
3935 /*
3936 * The tracer itself will not take this lock, but still we want
3937 * to provide a consistent cpumask to user-space:
3938 */
3939 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3940
3941 /*
3942 * Temporary storage for the character representation of the
3943 * CPU bitmask (and one more byte for the newline):
3944 */
3945 static char mask_str[NR_CPUS + 1];
3946
3947 static ssize_t
3948 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3949 size_t count, loff_t *ppos)
3950 {
3951 struct trace_array *tr = file_inode(filp)->i_private;
3952 int len;
3953
3954 mutex_lock(&tracing_cpumask_update_lock);
3955
3956 len = snprintf(mask_str, count, "%*pb\n",
3957 cpumask_pr_args(tr->tracing_cpumask));
3958 if (len >= count) {
3959 count = -EINVAL;
3960 goto out_err;
3961 }
3962 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3963
3964 out_err:
3965 mutex_unlock(&tracing_cpumask_update_lock);
3966
3967 return count;
3968 }
3969
3970 static ssize_t
3971 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3972 size_t count, loff_t *ppos)
3973 {
3974 struct trace_array *tr = file_inode(filp)->i_private;
3975 cpumask_var_t tracing_cpumask_new;
3976 int err, cpu;
3977
3978 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3979 return -ENOMEM;
3980
3981 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3982 if (err)
3983 goto err_unlock;
3984
3985 mutex_lock(&tracing_cpumask_update_lock);
3986
3987 local_irq_disable();
3988 arch_spin_lock(&tr->max_lock);
3989 for_each_tracing_cpu(cpu) {
3990 /*
3991 * Increase/decrease the disabled counter if we are
3992 * about to flip a bit in the cpumask:
3993 */
3994 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3995 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3996 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3997 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3998 }
3999 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4000 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4001 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4002 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4003 }
4004 }
4005 arch_spin_unlock(&tr->max_lock);
4006 local_irq_enable();
4007
4008 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4009
4010 mutex_unlock(&tracing_cpumask_update_lock);
4011 free_cpumask_var(tracing_cpumask_new);
4012
4013 return count;
4014
4015 err_unlock:
4016 free_cpumask_var(tracing_cpumask_new);
4017
4018 return err;
4019 }
4020
4021 static const struct file_operations tracing_cpumask_fops = {
4022 .open = tracing_open_generic_tr,
4023 .read = tracing_cpumask_read,
4024 .write = tracing_cpumask_write,
4025 .release = tracing_release_generic_tr,
4026 .llseek = generic_file_llseek,
4027 };
4028
4029 static int tracing_trace_options_show(struct seq_file *m, void *v)
4030 {
4031 struct tracer_opt *trace_opts;
4032 struct trace_array *tr = m->private;
4033 u32 tracer_flags;
4034 int i;
4035
4036 mutex_lock(&trace_types_lock);
4037 tracer_flags = tr->current_trace->flags->val;
4038 trace_opts = tr->current_trace->flags->opts;
4039
4040 for (i = 0; trace_options[i]; i++) {
4041 if (tr->trace_flags & (1 << i))
4042 seq_printf(m, "%s\n", trace_options[i]);
4043 else
4044 seq_printf(m, "no%s\n", trace_options[i]);
4045 }
4046
4047 for (i = 0; trace_opts[i].name; i++) {
4048 if (tracer_flags & trace_opts[i].bit)
4049 seq_printf(m, "%s\n", trace_opts[i].name);
4050 else
4051 seq_printf(m, "no%s\n", trace_opts[i].name);
4052 }
4053 mutex_unlock(&trace_types_lock);
4054
4055 return 0;
4056 }
4057
4058 static int __set_tracer_option(struct trace_array *tr,
4059 struct tracer_flags *tracer_flags,
4060 struct tracer_opt *opts, int neg)
4061 {
4062 struct tracer *trace = tracer_flags->trace;
4063 int ret;
4064
4065 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4066 if (ret)
4067 return ret;
4068
4069 if (neg)
4070 tracer_flags->val &= ~opts->bit;
4071 else
4072 tracer_flags->val |= opts->bit;
4073 return 0;
4074 }
4075
4076 /* Try to assign a tracer specific option */
4077 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4078 {
4079 struct tracer *trace = tr->current_trace;
4080 struct tracer_flags *tracer_flags = trace->flags;
4081 struct tracer_opt *opts = NULL;
4082 int i;
4083
4084 for (i = 0; tracer_flags->opts[i].name; i++) {
4085 opts = &tracer_flags->opts[i];
4086
4087 if (strcmp(cmp, opts->name) == 0)
4088 return __set_tracer_option(tr, trace->flags, opts, neg);
4089 }
4090
4091 return -EINVAL;
4092 }
4093
4094 /* Some tracers require overwrite to stay enabled */
4095 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4096 {
4097 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4098 return -1;
4099
4100 return 0;
4101 }
4102
4103 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4104 {
4105 /* do nothing if flag is already set */
4106 if (!!(tr->trace_flags & mask) == !!enabled)
4107 return 0;
4108
4109 /* Give the tracer a chance to approve the change */
4110 if (tr->current_trace->flag_changed)
4111 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4112 return -EINVAL;
4113
4114 if (enabled)
4115 tr->trace_flags |= mask;
4116 else
4117 tr->trace_flags &= ~mask;
4118
4119 if (mask == TRACE_ITER_RECORD_CMD)
4120 trace_event_enable_cmd_record(enabled);
4121
4122 if (mask == TRACE_ITER_EVENT_FORK)
4123 trace_event_follow_fork(tr, enabled);
4124
4125 if (mask == TRACE_ITER_OVERWRITE) {
4126 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4127 #ifdef CONFIG_TRACER_MAX_TRACE
4128 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4129 #endif
4130 }
4131
4132 if (mask == TRACE_ITER_PRINTK) {
4133 trace_printk_start_stop_comm(enabled);
4134 trace_printk_control(enabled);
4135 }
4136
4137 return 0;
4138 }
4139
4140 static int trace_set_options(struct trace_array *tr, char *option)
4141 {
4142 char *cmp;
4143 int neg = 0;
4144 int ret = -ENODEV;
4145 int i;
4146 size_t orig_len = strlen(option);
4147
4148 cmp = strstrip(option);
4149
4150 if (strncmp(cmp, "no", 2) == 0) {
4151 neg = 1;
4152 cmp += 2;
4153 }
4154
4155 mutex_lock(&trace_types_lock);
4156
4157 for (i = 0; trace_options[i]; i++) {
4158 if (strcmp(cmp, trace_options[i]) == 0) {
4159 ret = set_tracer_flag(tr, 1 << i, !neg);
4160 break;
4161 }
4162 }
4163
4164 /* If no option could be set, test the specific tracer options */
4165 if (!trace_options[i])
4166 ret = set_tracer_option(tr, cmp, neg);
4167
4168 mutex_unlock(&trace_types_lock);
4169
4170 /*
4171 * If the first trailing whitespace is replaced with '\0' by strstrip,
4172 * turn it back into a space.
4173 */
4174 if (orig_len > strlen(option))
4175 option[strlen(option)] = ' ';
4176
4177 return ret;
4178 }
4179
4180 static void __init apply_trace_boot_options(void)
4181 {
4182 char *buf = trace_boot_options_buf;
4183 char *option;
4184
4185 while (true) {
4186 option = strsep(&buf, ",");
4187
4188 if (!option)
4189 break;
4190
4191 if (*option)
4192 trace_set_options(&global_trace, option);
4193
4194 /* Put back the comma to allow this to be called again */
4195 if (buf)
4196 *(buf - 1) = ',';
4197 }
4198 }
4199
4200 static ssize_t
4201 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4202 size_t cnt, loff_t *ppos)
4203 {
4204 struct seq_file *m = filp->private_data;
4205 struct trace_array *tr = m->private;
4206 char buf[64];
4207 int ret;
4208
4209 if (cnt >= sizeof(buf))
4210 return -EINVAL;
4211
4212 if (copy_from_user(buf, ubuf, cnt))
4213 return -EFAULT;
4214
4215 buf[cnt] = 0;
4216
4217 ret = trace_set_options(tr, buf);
4218 if (ret < 0)
4219 return ret;
4220
4221 *ppos += cnt;
4222
4223 return cnt;
4224 }
4225
4226 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4227 {
4228 struct trace_array *tr = inode->i_private;
4229 int ret;
4230
4231 if (tracing_disabled)
4232 return -ENODEV;
4233
4234 if (trace_array_get(tr) < 0)
4235 return -ENODEV;
4236
4237 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4238 if (ret < 0)
4239 trace_array_put(tr);
4240
4241 return ret;
4242 }
4243
4244 static const struct file_operations tracing_iter_fops = {
4245 .open = tracing_trace_options_open,
4246 .read = seq_read,
4247 .llseek = seq_lseek,
4248 .release = tracing_single_release_tr,
4249 .write = tracing_trace_options_write,
4250 };
4251
4252 static const char readme_msg[] =
4253 "tracing mini-HOWTO:\n\n"
4254 "# echo 0 > tracing_on : quick way to disable tracing\n"
4255 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4256 " Important files:\n"
4257 " trace\t\t\t- The static contents of the buffer\n"
4258 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4259 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4260 " current_tracer\t- function and latency tracers\n"
4261 " available_tracers\t- list of configured tracers for current_tracer\n"
4262 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4263 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4264 " trace_clock\t\t-change the clock used to order events\n"
4265 " local: Per cpu clock but may not be synced across CPUs\n"
4266 " global: Synced across CPUs but slows tracing down.\n"
4267 " counter: Not a clock, but just an increment\n"
4268 " uptime: Jiffy counter from time of boot\n"
4269 " perf: Same clock that perf events use\n"
4270 #ifdef CONFIG_X86_64
4271 " x86-tsc: TSC cycle counter\n"
4272 #endif
4273 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4274 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4275 " tracing_cpumask\t- Limit which CPUs to trace\n"
4276 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4277 "\t\t\t Remove sub-buffer with rmdir\n"
4278 " trace_options\t\t- Set format or modify how tracing happens\n"
4279 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4280 "\t\t\t option name\n"
4281 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4282 #ifdef CONFIG_DYNAMIC_FTRACE
4283 "\n available_filter_functions - list of functions that can be filtered on\n"
4284 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4285 "\t\t\t functions\n"
4286 "\t accepts: func_full_name or glob-matching-pattern\n"
4287 "\t modules: Can select a group via module\n"
4288 "\t Format: :mod:<module-name>\n"
4289 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4290 "\t triggers: a command to perform when function is hit\n"
4291 "\t Format: <function>:<trigger>[:count]\n"
4292 "\t trigger: traceon, traceoff\n"
4293 "\t\t enable_event:<system>:<event>\n"
4294 "\t\t disable_event:<system>:<event>\n"
4295 #ifdef CONFIG_STACKTRACE
4296 "\t\t stacktrace\n"
4297 #endif
4298 #ifdef CONFIG_TRACER_SNAPSHOT
4299 "\t\t snapshot\n"
4300 #endif
4301 "\t\t dump\n"
4302 "\t\t cpudump\n"
4303 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4304 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4305 "\t The first one will disable tracing every time do_fault is hit\n"
4306 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4307 "\t The first time do trap is hit and it disables tracing, the\n"
4308 "\t counter will decrement to 2. If tracing is already disabled,\n"
4309 "\t the counter will not decrement. It only decrements when the\n"
4310 "\t trigger did work\n"
4311 "\t To remove trigger without count:\n"
4312 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4313 "\t To remove trigger with a count:\n"
4314 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4315 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4316 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4317 "\t modules: Can select a group via module command :mod:\n"
4318 "\t Does not accept triggers\n"
4319 #endif /* CONFIG_DYNAMIC_FTRACE */
4320 #ifdef CONFIG_FUNCTION_TRACER
4321 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4322 "\t\t (function)\n"
4323 #endif
4324 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4325 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4326 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4327 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4328 #endif
4329 #ifdef CONFIG_TRACER_SNAPSHOT
4330 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4331 "\t\t\t snapshot buffer. Read the contents for more\n"
4332 "\t\t\t information\n"
4333 #endif
4334 #ifdef CONFIG_STACK_TRACER
4335 " stack_trace\t\t- Shows the max stack trace when active\n"
4336 " stack_max_size\t- Shows current max stack size that was traced\n"
4337 "\t\t\t Write into this file to reset the max size (trigger a\n"
4338 "\t\t\t new trace)\n"
4339 #ifdef CONFIG_DYNAMIC_FTRACE
4340 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4341 "\t\t\t traces\n"
4342 #endif
4343 #endif /* CONFIG_STACK_TRACER */
4344 #ifdef CONFIG_KPROBE_EVENT
4345 " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4346 "\t\t\t Write into this file to define/undefine new trace events.\n"
4347 #endif
4348 #ifdef CONFIG_UPROBE_EVENT
4349 " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4350 "\t\t\t Write into this file to define/undefine new trace events.\n"
4351 #endif
4352 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4353 "\t accepts: event-definitions (one definition per line)\n"
4354 "\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4355 "\t -:[<group>/]<event>\n"
4356 #ifdef CONFIG_KPROBE_EVENT
4357 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4358 #endif
4359 #ifdef CONFIG_UPROBE_EVENT
4360 "\t place: <path>:<offset>\n"
4361 #endif
4362 "\t args: <name>=fetcharg[:type]\n"
4363 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4364 "\t $stack<index>, $stack, $retval, $comm\n"
4365 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4366 "\t b<bit-width>@<bit-offset>/<container-size>\n"
4367 #endif
4368 " events/\t\t- Directory containing all trace event subsystems:\n"
4369 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4370 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4371 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4372 "\t\t\t events\n"
4373 " filter\t\t- If set, only events passing filter are traced\n"
4374 " events/<system>/<event>/\t- Directory containing control files for\n"
4375 "\t\t\t <event>:\n"
4376 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4377 " filter\t\t- If set, only events passing filter are traced\n"
4378 " trigger\t\t- If set, a command to perform when event is hit\n"
4379 "\t Format: <trigger>[:count][if <filter>]\n"
4380 "\t trigger: traceon, traceoff\n"
4381 "\t enable_event:<system>:<event>\n"
4382 "\t disable_event:<system>:<event>\n"
4383 #ifdef CONFIG_HIST_TRIGGERS
4384 "\t enable_hist:<system>:<event>\n"
4385 "\t disable_hist:<system>:<event>\n"
4386 #endif
4387 #ifdef CONFIG_STACKTRACE
4388 "\t\t stacktrace\n"
4389 #endif
4390 #ifdef CONFIG_TRACER_SNAPSHOT
4391 "\t\t snapshot\n"
4392 #endif
4393 #ifdef CONFIG_HIST_TRIGGERS
4394 "\t\t hist (see below)\n"
4395 #endif
4396 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4397 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4398 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4399 "\t events/block/block_unplug/trigger\n"
4400 "\t The first disables tracing every time block_unplug is hit.\n"
4401 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4402 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4403 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4404 "\t Like function triggers, the counter is only decremented if it\n"
4405 "\t enabled or disabled tracing.\n"
4406 "\t To remove a trigger without a count:\n"
4407 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4408 "\t To remove a trigger with a count:\n"
4409 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4410 "\t Filters can be ignored when removing a trigger.\n"
4411 #ifdef CONFIG_HIST_TRIGGERS
4412 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4413 "\t Format: hist:keys=<field1[,field2,...]>\n"
4414 "\t [:values=<field1[,field2,...]>]\n"
4415 "\t [:sort=<field1[,field2,...]>]\n"
4416 "\t [:size=#entries]\n"
4417 "\t [:pause][:continue][:clear]\n"
4418 "\t [:name=histname1]\n"
4419 "\t [if <filter>]\n\n"
4420 "\t When a matching event is hit, an entry is added to a hash\n"
4421 "\t table using the key(s) and value(s) named, and the value of a\n"
4422 "\t sum called 'hitcount' is incremented. Keys and values\n"
4423 "\t correspond to fields in the event's format description. Keys\n"
4424 "\t can be any field, or the special string 'stacktrace'.\n"
4425 "\t Compound keys consisting of up to two fields can be specified\n"
4426 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4427 "\t fields. Sort keys consisting of up to two fields can be\n"
4428 "\t specified using the 'sort' keyword. The sort direction can\n"
4429 "\t be modified by appending '.descending' or '.ascending' to a\n"
4430 "\t sort field. The 'size' parameter can be used to specify more\n"
4431 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4432 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4433 "\t its histogram data will be shared with other triggers of the\n"
4434 "\t same name, and trigger hits will update this common data.\n\n"
4435 "\t Reading the 'hist' file for the event will dump the hash\n"
4436 "\t table in its entirety to stdout. If there are multiple hist\n"
4437 "\t triggers attached to an event, there will be a table for each\n"
4438 "\t trigger in the output. The table displayed for a named\n"
4439 "\t trigger will be the same as any other instance having the\n"
4440 "\t same name. The default format used to display a given field\n"
4441 "\t can be modified by appending any of the following modifiers\n"
4442 "\t to the field name, as applicable:\n\n"
4443 "\t .hex display a number as a hex value\n"
4444 "\t .sym display an address as a symbol\n"
4445 "\t .sym-offset display an address as a symbol and offset\n"
4446 "\t .execname display a common_pid as a program name\n"
4447 "\t .syscall display a syscall id as a syscall name\n\n"
4448 "\t .log2 display log2 value rather than raw number\n\n"
4449 "\t The 'pause' parameter can be used to pause an existing hist\n"
4450 "\t trigger or to start a hist trigger but not log any events\n"
4451 "\t until told to do so. 'continue' can be used to start or\n"
4452 "\t restart a paused hist trigger.\n\n"
4453 "\t The 'clear' parameter will clear the contents of a running\n"
4454 "\t hist trigger and leave its current paused/active state\n"
4455 "\t unchanged.\n\n"
4456 "\t The enable_hist and disable_hist triggers can be used to\n"
4457 "\t have one event conditionally start and stop another event's\n"
4458 "\t already-attached hist trigger. The syntax is analagous to\n"
4459 "\t the enable_event and disable_event triggers.\n"
4460 #endif
4461 ;
4462
4463 static ssize_t
4464 tracing_readme_read(struct file *filp, char __user *ubuf,
4465 size_t cnt, loff_t *ppos)
4466 {
4467 return simple_read_from_buffer(ubuf, cnt, ppos,
4468 readme_msg, strlen(readme_msg));
4469 }
4470
4471 static const struct file_operations tracing_readme_fops = {
4472 .open = tracing_open_generic,
4473 .read = tracing_readme_read,
4474 .llseek = generic_file_llseek,
4475 };
4476
4477 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4478 {
4479 unsigned int *ptr = v;
4480
4481 if (*pos || m->count)
4482 ptr++;
4483
4484 (*pos)++;
4485
4486 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4487 ptr++) {
4488 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4489 continue;
4490
4491 return ptr;
4492 }
4493
4494 return NULL;
4495 }
4496
4497 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4498 {
4499 void *v;
4500 loff_t l = 0;
4501
4502 preempt_disable();
4503 arch_spin_lock(&trace_cmdline_lock);
4504
4505 v = &savedcmd->map_cmdline_to_pid[0];
4506 while (l <= *pos) {
4507 v = saved_cmdlines_next(m, v, &l);
4508 if (!v)
4509 return NULL;
4510 }
4511
4512 return v;
4513 }
4514
4515 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4516 {
4517 arch_spin_unlock(&trace_cmdline_lock);
4518 preempt_enable();
4519 }
4520
4521 static int saved_cmdlines_show(struct seq_file *m, void *v)
4522 {
4523 char buf[TASK_COMM_LEN];
4524 unsigned int *pid = v;
4525
4526 __trace_find_cmdline(*pid, buf);
4527 seq_printf(m, "%d %s\n", *pid, buf);
4528 return 0;
4529 }
4530
4531 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4532 .start = saved_cmdlines_start,
4533 .next = saved_cmdlines_next,
4534 .stop = saved_cmdlines_stop,
4535 .show = saved_cmdlines_show,
4536 };
4537
4538 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4539 {
4540 if (tracing_disabled)
4541 return -ENODEV;
4542
4543 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4544 }
4545
4546 static const struct file_operations tracing_saved_cmdlines_fops = {
4547 .open = tracing_saved_cmdlines_open,
4548 .read = seq_read,
4549 .llseek = seq_lseek,
4550 .release = seq_release,
4551 };
4552
4553 static ssize_t
4554 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4555 size_t cnt, loff_t *ppos)
4556 {
4557 char buf[64];
4558 int r;
4559
4560 arch_spin_lock(&trace_cmdline_lock);
4561 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4562 arch_spin_unlock(&trace_cmdline_lock);
4563
4564 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4565 }
4566
4567 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4568 {
4569 kfree(s->saved_cmdlines);
4570 kfree(s->map_cmdline_to_pid);
4571 kfree(s);
4572 }
4573
4574 static int tracing_resize_saved_cmdlines(unsigned int val)
4575 {
4576 struct saved_cmdlines_buffer *s, *savedcmd_temp;
4577
4578 s = kmalloc(sizeof(*s), GFP_KERNEL);
4579 if (!s)
4580 return -ENOMEM;
4581
4582 if (allocate_cmdlines_buffer(val, s) < 0) {
4583 kfree(s);
4584 return -ENOMEM;
4585 }
4586
4587 arch_spin_lock(&trace_cmdline_lock);
4588 savedcmd_temp = savedcmd;
4589 savedcmd = s;
4590 arch_spin_unlock(&trace_cmdline_lock);
4591 free_saved_cmdlines_buffer(savedcmd_temp);
4592
4593 return 0;
4594 }
4595
4596 static ssize_t
4597 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4598 size_t cnt, loff_t *ppos)
4599 {
4600 unsigned long val;
4601 int ret;
4602
4603 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4604 if (ret)
4605 return ret;
4606
4607 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4608 if (!val || val > PID_MAX_DEFAULT)
4609 return -EINVAL;
4610
4611 ret = tracing_resize_saved_cmdlines((unsigned int)val);
4612 if (ret < 0)
4613 return ret;
4614
4615 *ppos += cnt;
4616
4617 return cnt;
4618 }
4619
4620 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4621 .open = tracing_open_generic,
4622 .read = tracing_saved_cmdlines_size_read,
4623 .write = tracing_saved_cmdlines_size_write,
4624 };
4625
4626 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4627 static union trace_enum_map_item *
4628 update_enum_map(union trace_enum_map_item *ptr)
4629 {
4630 if (!ptr->map.enum_string) {
4631 if (ptr->tail.next) {
4632 ptr = ptr->tail.next;
4633 /* Set ptr to the next real item (skip head) */
4634 ptr++;
4635 } else
4636 return NULL;
4637 }
4638 return ptr;
4639 }
4640
4641 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4642 {
4643 union trace_enum_map_item *ptr = v;
4644
4645 /*
4646 * Paranoid! If ptr points to end, we don't want to increment past it.
4647 * This really should never happen.
4648 */
4649 ptr = update_enum_map(ptr);
4650 if (WARN_ON_ONCE(!ptr))
4651 return NULL;
4652
4653 ptr++;
4654
4655 (*pos)++;
4656
4657 ptr = update_enum_map(ptr);
4658
4659 return ptr;
4660 }
4661
4662 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4663 {
4664 union trace_enum_map_item *v;
4665 loff_t l = 0;
4666
4667 mutex_lock(&trace_enum_mutex);
4668
4669 v = trace_enum_maps;
4670 if (v)
4671 v++;
4672
4673 while (v && l < *pos) {
4674 v = enum_map_next(m, v, &l);
4675 }
4676
4677 return v;
4678 }
4679
4680 static void enum_map_stop(struct seq_file *m, void *v)
4681 {
4682 mutex_unlock(&trace_enum_mutex);
4683 }
4684
4685 static int enum_map_show(struct seq_file *m, void *v)
4686 {
4687 union trace_enum_map_item *ptr = v;
4688
4689 seq_printf(m, "%s %ld (%s)\n",
4690 ptr->map.enum_string, ptr->map.enum_value,
4691 ptr->map.system);
4692
4693 return 0;
4694 }
4695
4696 static const struct seq_operations tracing_enum_map_seq_ops = {
4697 .start = enum_map_start,
4698 .next = enum_map_next,
4699 .stop = enum_map_stop,
4700 .show = enum_map_show,
4701 };
4702
4703 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4704 {
4705 if (tracing_disabled)
4706 return -ENODEV;
4707
4708 return seq_open(filp, &tracing_enum_map_seq_ops);
4709 }
4710
4711 static const struct file_operations tracing_enum_map_fops = {
4712 .open = tracing_enum_map_open,
4713 .read = seq_read,
4714 .llseek = seq_lseek,
4715 .release = seq_release,
4716 };
4717
4718 static inline union trace_enum_map_item *
4719 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4720 {
4721 /* Return tail of array given the head */
4722 return ptr + ptr->head.length + 1;
4723 }
4724
4725 static void
4726 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4727 int len)
4728 {
4729 struct trace_enum_map **stop;
4730 struct trace_enum_map **map;
4731 union trace_enum_map_item *map_array;
4732 union trace_enum_map_item *ptr;
4733
4734 stop = start + len;
4735
4736 /*
4737 * The trace_enum_maps contains the map plus a head and tail item,
4738 * where the head holds the module and length of array, and the
4739 * tail holds a pointer to the next list.
4740 */
4741 map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4742 if (!map_array) {
4743 pr_warn("Unable to allocate trace enum mapping\n");
4744 return;
4745 }
4746
4747 mutex_lock(&trace_enum_mutex);
4748
4749 if (!trace_enum_maps)
4750 trace_enum_maps = map_array;
4751 else {
4752 ptr = trace_enum_maps;
4753 for (;;) {
4754 ptr = trace_enum_jmp_to_tail(ptr);
4755 if (!ptr->tail.next)
4756 break;
4757 ptr = ptr->tail.next;
4758
4759 }
4760 ptr->tail.next = map_array;
4761 }
4762 map_array->head.mod = mod;
4763 map_array->head.length = len;
4764 map_array++;
4765
4766 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4767 map_array->map = **map;
4768 map_array++;
4769 }
4770 memset(map_array, 0, sizeof(*map_array));
4771
4772 mutex_unlock(&trace_enum_mutex);
4773 }
4774
4775 static void trace_create_enum_file(struct dentry *d_tracer)
4776 {
4777 trace_create_file("enum_map", 0444, d_tracer,
4778 NULL, &tracing_enum_map_fops);
4779 }
4780
4781 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4782 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4783 static inline void trace_insert_enum_map_file(struct module *mod,
4784 struct trace_enum_map **start, int len) { }
4785 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4786
4787 static void trace_insert_enum_map(struct module *mod,
4788 struct trace_enum_map **start, int len)
4789 {
4790 struct trace_enum_map **map;
4791
4792 if (len <= 0)
4793 return;
4794
4795 map = start;
4796
4797 trace_event_enum_update(map, len);
4798
4799 trace_insert_enum_map_file(mod, start, len);
4800 }
4801
4802 static ssize_t
4803 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4804 size_t cnt, loff_t *ppos)
4805 {
4806 struct trace_array *tr = filp->private_data;
4807 char buf[MAX_TRACER_SIZE+2];
4808 int r;
4809
4810 mutex_lock(&trace_types_lock);
4811 r = sprintf(buf, "%s\n", tr->current_trace->name);
4812 mutex_unlock(&trace_types_lock);
4813
4814 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4815 }
4816
4817 int tracer_init(struct tracer *t, struct trace_array *tr)
4818 {
4819 tracing_reset_online_cpus(&tr->trace_buffer);
4820 return t->init(tr);
4821 }
4822
4823 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4824 {
4825 int cpu;
4826
4827 for_each_tracing_cpu(cpu)
4828 per_cpu_ptr(buf->data, cpu)->entries = val;
4829 }
4830
4831 #ifdef CONFIG_TRACER_MAX_TRACE
4832 /* resize @tr's buffer to the size of @size_tr's entries */
4833 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4834 struct trace_buffer *size_buf, int cpu_id)
4835 {
4836 int cpu, ret = 0;
4837
4838 if (cpu_id == RING_BUFFER_ALL_CPUS) {
4839 for_each_tracing_cpu(cpu) {
4840 ret = ring_buffer_resize(trace_buf->buffer,
4841 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4842 if (ret < 0)
4843 break;
4844 per_cpu_ptr(trace_buf->data, cpu)->entries =
4845 per_cpu_ptr(size_buf->data, cpu)->entries;
4846 }
4847 } else {
4848 ret = ring_buffer_resize(trace_buf->buffer,
4849 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4850 if (ret == 0)
4851 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4852 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4853 }
4854
4855 return ret;
4856 }
4857 #endif /* CONFIG_TRACER_MAX_TRACE */
4858
4859 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4860 unsigned long size, int cpu)
4861 {
4862 int ret;
4863
4864 /*
4865 * If kernel or user changes the size of the ring buffer
4866 * we use the size that was given, and we can forget about
4867 * expanding it later.
4868 */
4869 ring_buffer_expanded = true;
4870
4871 /* May be called before buffers are initialized */
4872 if (!tr->trace_buffer.buffer)
4873 return 0;
4874
4875 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4876 if (ret < 0)
4877 return ret;
4878
4879 #ifdef CONFIG_TRACER_MAX_TRACE
4880 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4881 !tr->current_trace->use_max_tr)
4882 goto out;
4883
4884 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4885 if (ret < 0) {
4886 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4887 &tr->trace_buffer, cpu);
4888 if (r < 0) {
4889 /*
4890 * AARGH! We are left with different
4891 * size max buffer!!!!
4892 * The max buffer is our "snapshot" buffer.
4893 * When a tracer needs a snapshot (one of the
4894 * latency tracers), it swaps the max buffer
4895 * with the saved snap shot. We succeeded to
4896 * update the size of the main buffer, but failed to
4897 * update the size of the max buffer. But when we tried
4898 * to reset the main buffer to the original size, we
4899 * failed there too. This is very unlikely to
4900 * happen, but if it does, warn and kill all
4901 * tracing.
4902 */
4903 WARN_ON(1);
4904 tracing_disabled = 1;
4905 }
4906 return ret;
4907 }
4908
4909 if (cpu == RING_BUFFER_ALL_CPUS)
4910 set_buffer_entries(&tr->max_buffer, size);
4911 else
4912 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4913
4914 out:
4915 #endif /* CONFIG_TRACER_MAX_TRACE */
4916
4917 if (cpu == RING_BUFFER_ALL_CPUS)
4918 set_buffer_entries(&tr->trace_buffer, size);
4919 else
4920 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4921
4922 return ret;
4923 }
4924
4925 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4926 unsigned long size, int cpu_id)
4927 {
4928 int ret = size;
4929
4930 mutex_lock(&trace_types_lock);
4931
4932 if (cpu_id != RING_BUFFER_ALL_CPUS) {
4933 /* make sure, this cpu is enabled in the mask */
4934 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4935 ret = -EINVAL;
4936 goto out;
4937 }
4938 }
4939
4940 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4941 if (ret < 0)
4942 ret = -ENOMEM;
4943
4944 out:
4945 mutex_unlock(&trace_types_lock);
4946
4947 return ret;
4948 }
4949
4950
4951 /**
4952 * tracing_update_buffers - used by tracing facility to expand ring buffers
4953 *
4954 * To save on memory when the tracing is never used on a system with it
4955 * configured in. The ring buffers are set to a minimum size. But once
4956 * a user starts to use the tracing facility, then they need to grow
4957 * to their default size.
4958 *
4959 * This function is to be called when a tracer is about to be used.
4960 */
4961 int tracing_update_buffers(void)
4962 {
4963 int ret = 0;
4964
4965 mutex_lock(&trace_types_lock);
4966 if (!ring_buffer_expanded)
4967 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4968 RING_BUFFER_ALL_CPUS);
4969 mutex_unlock(&trace_types_lock);
4970
4971 return ret;
4972 }
4973
4974 struct trace_option_dentry;
4975
4976 static void
4977 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4978
4979 /*
4980 * Used to clear out the tracer before deletion of an instance.
4981 * Must have trace_types_lock held.
4982 */
4983 static void tracing_set_nop(struct trace_array *tr)
4984 {
4985 if (tr->current_trace == &nop_trace)
4986 return;
4987
4988 tr->current_trace->enabled--;
4989
4990 if (tr->current_trace->reset)
4991 tr->current_trace->reset(tr);
4992
4993 tr->current_trace = &nop_trace;
4994 }
4995
4996 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4997 {
4998 /* Only enable if the directory has been created already. */
4999 if (!tr->dir)
5000 return;
5001
5002 create_trace_option_files(tr, t);
5003 }
5004
5005 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5006 {
5007 struct tracer *t;
5008 #ifdef CONFIG_TRACER_MAX_TRACE
5009 bool had_max_tr;
5010 #endif
5011 int ret = 0;
5012
5013 mutex_lock(&trace_types_lock);
5014
5015 if (!ring_buffer_expanded) {
5016 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5017 RING_BUFFER_ALL_CPUS);
5018 if (ret < 0)
5019 goto out;
5020 ret = 0;
5021 }
5022
5023 for (t = trace_types; t; t = t->next) {
5024 if (strcmp(t->name, buf) == 0)
5025 break;
5026 }
5027 if (!t) {
5028 ret = -EINVAL;
5029 goto out;
5030 }
5031 if (t == tr->current_trace)
5032 goto out;
5033
5034 /* Some tracers are only allowed for the top level buffer */
5035 if (!trace_ok_for_array(t, tr)) {
5036 ret = -EINVAL;
5037 goto out;
5038 }
5039
5040 /* If trace pipe files are being read, we can't change the tracer */
5041 if (tr->current_trace->ref) {
5042 ret = -EBUSY;
5043 goto out;
5044 }
5045
5046 trace_branch_disable();
5047
5048 tr->current_trace->enabled--;
5049
5050 if (tr->current_trace->reset)
5051 tr->current_trace->reset(tr);
5052
5053 /* Current trace needs to be nop_trace before synchronize_sched */
5054 tr->current_trace = &nop_trace;
5055
5056 #ifdef CONFIG_TRACER_MAX_TRACE
5057 had_max_tr = tr->allocated_snapshot;
5058
5059 if (had_max_tr && !t->use_max_tr) {
5060 /*
5061 * We need to make sure that the update_max_tr sees that
5062 * current_trace changed to nop_trace to keep it from
5063 * swapping the buffers after we resize it.
5064 * The update_max_tr is called from interrupts disabled
5065 * so a synchronized_sched() is sufficient.
5066 */
5067 synchronize_sched();
5068 free_snapshot(tr);
5069 }
5070 #endif
5071
5072 #ifdef CONFIG_TRACER_MAX_TRACE
5073 if (t->use_max_tr && !had_max_tr) {
5074 ret = alloc_snapshot(tr);
5075 if (ret < 0)
5076 goto out;
5077 }
5078 #endif
5079
5080 if (t->init) {
5081 ret = tracer_init(t, tr);
5082 if (ret)
5083 goto out;
5084 }
5085
5086 tr->current_trace = t;
5087 tr->current_trace->enabled++;
5088 trace_branch_enable(tr);
5089 out:
5090 mutex_unlock(&trace_types_lock);
5091
5092 return ret;
5093 }
5094
5095 static ssize_t
5096 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5097 size_t cnt, loff_t *ppos)
5098 {
5099 struct trace_array *tr = filp->private_data;
5100 char buf[MAX_TRACER_SIZE+1];
5101 int i;
5102 size_t ret;
5103 int err;
5104
5105 ret = cnt;
5106
5107 if (cnt > MAX_TRACER_SIZE)
5108 cnt = MAX_TRACER_SIZE;
5109
5110 if (copy_from_user(buf, ubuf, cnt))
5111 return -EFAULT;
5112
5113 buf[cnt] = 0;
5114
5115 /* strip ending whitespace. */
5116 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5117 buf[i] = 0;
5118
5119 err = tracing_set_tracer(tr, buf);
5120 if (err)
5121 return err;
5122
5123 *ppos += ret;
5124
5125 return ret;
5126 }
5127
5128 static ssize_t
5129 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5130 size_t cnt, loff_t *ppos)
5131 {
5132 char buf[64];
5133 int r;
5134
5135 r = snprintf(buf, sizeof(buf), "%ld\n",
5136 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5137 if (r > sizeof(buf))
5138 r = sizeof(buf);
5139 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5140 }
5141
5142 static ssize_t
5143 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5144 size_t cnt, loff_t *ppos)
5145 {
5146 unsigned long val;
5147 int ret;
5148
5149 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5150 if (ret)
5151 return ret;
5152
5153 *ptr = val * 1000;
5154
5155 return cnt;
5156 }
5157
5158 static ssize_t
5159 tracing_thresh_read(struct file *filp, char __user *ubuf,
5160 size_t cnt, loff_t *ppos)
5161 {
5162 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5163 }
5164
5165 static ssize_t
5166 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5167 size_t cnt, loff_t *ppos)
5168 {
5169 struct trace_array *tr = filp->private_data;
5170 int ret;
5171
5172 mutex_lock(&trace_types_lock);
5173 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5174 if (ret < 0)
5175 goto out;
5176
5177 if (tr->current_trace->update_thresh) {
5178 ret = tr->current_trace->update_thresh(tr);
5179 if (ret < 0)
5180 goto out;
5181 }
5182
5183 ret = cnt;
5184 out:
5185 mutex_unlock(&trace_types_lock);
5186
5187 return ret;
5188 }
5189
5190 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5191
5192 static ssize_t
5193 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5194 size_t cnt, loff_t *ppos)
5195 {
5196 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5197 }
5198
5199 static ssize_t
5200 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5201 size_t cnt, loff_t *ppos)
5202 {
5203 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5204 }
5205
5206 #endif
5207
5208 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5209 {
5210 struct trace_array *tr = inode->i_private;
5211 struct trace_iterator *iter;
5212 int ret = 0;
5213
5214 if (tracing_disabled)
5215 return -ENODEV;
5216
5217 if (trace_array_get(tr) < 0)
5218 return -ENODEV;
5219
5220 mutex_lock(&trace_types_lock);
5221
5222 /* create a buffer to store the information to pass to userspace */
5223 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5224 if (!iter) {
5225 ret = -ENOMEM;
5226 __trace_array_put(tr);
5227 goto out;
5228 }
5229
5230 trace_seq_init(&iter->seq);
5231 iter->trace = tr->current_trace;
5232
5233 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5234 ret = -ENOMEM;
5235 goto fail;
5236 }
5237
5238 /* trace pipe does not show start of buffer */
5239 cpumask_setall(iter->started);
5240
5241 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5242 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5243
5244 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5245 if (trace_clocks[tr->clock_id].in_ns)
5246 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5247
5248 iter->tr = tr;
5249 iter->trace_buffer = &tr->trace_buffer;
5250 iter->cpu_file = tracing_get_cpu(inode);
5251 mutex_init(&iter->mutex);
5252 filp->private_data = iter;
5253
5254 if (iter->trace->pipe_open)
5255 iter->trace->pipe_open(iter);
5256
5257 nonseekable_open(inode, filp);
5258
5259 tr->current_trace->ref++;
5260 out:
5261 mutex_unlock(&trace_types_lock);
5262 return ret;
5263
5264 fail:
5265 kfree(iter->trace);
5266 kfree(iter);
5267 __trace_array_put(tr);
5268 mutex_unlock(&trace_types_lock);
5269 return ret;
5270 }
5271
5272 static int tracing_release_pipe(struct inode *inode, struct file *file)
5273 {
5274 struct trace_iterator *iter = file->private_data;
5275 struct trace_array *tr = inode->i_private;
5276
5277 mutex_lock(&trace_types_lock);
5278
5279 tr->current_trace->ref--;
5280
5281 if (iter->trace->pipe_close)
5282 iter->trace->pipe_close(iter);
5283
5284 mutex_unlock(&trace_types_lock);
5285
5286 free_cpumask_var(iter->started);
5287 mutex_destroy(&iter->mutex);
5288 kfree(iter);
5289
5290 trace_array_put(tr);
5291
5292 return 0;
5293 }
5294
5295 static unsigned int
5296 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5297 {
5298 struct trace_array *tr = iter->tr;
5299
5300 /* Iterators are static, they should be filled or empty */
5301 if (trace_buffer_iter(iter, iter->cpu_file))
5302 return POLLIN | POLLRDNORM;
5303
5304 if (tr->trace_flags & TRACE_ITER_BLOCK)
5305 /*
5306 * Always select as readable when in blocking mode
5307 */
5308 return POLLIN | POLLRDNORM;
5309 else
5310 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5311 filp, poll_table);
5312 }
5313
5314 static unsigned int
5315 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5316 {
5317 struct trace_iterator *iter = filp->private_data;
5318
5319 return trace_poll(iter, filp, poll_table);
5320 }
5321
5322 /* Must be called with iter->mutex held. */
5323 static int tracing_wait_pipe(struct file *filp)
5324 {
5325 struct trace_iterator *iter = filp->private_data;
5326 int ret;
5327
5328 while (trace_empty(iter)) {
5329
5330 if ((filp->f_flags & O_NONBLOCK)) {
5331 return -EAGAIN;
5332 }
5333
5334 /*
5335 * We block until we read something and tracing is disabled.
5336 * We still block if tracing is disabled, but we have never
5337 * read anything. This allows a user to cat this file, and
5338 * then enable tracing. But after we have read something,
5339 * we give an EOF when tracing is again disabled.
5340 *
5341 * iter->pos will be 0 if we haven't read anything.
5342 */
5343 if (!tracing_is_on() && iter->pos)
5344 break;
5345
5346 mutex_unlock(&iter->mutex);
5347
5348 ret = wait_on_pipe(iter, false);
5349
5350 mutex_lock(&iter->mutex);
5351
5352 if (ret)
5353 return ret;
5354 }
5355
5356 return 1;
5357 }
5358
5359 /*
5360 * Consumer reader.
5361 */
5362 static ssize_t
5363 tracing_read_pipe(struct file *filp, char __user *ubuf,
5364 size_t cnt, loff_t *ppos)
5365 {
5366 struct trace_iterator *iter = filp->private_data;
5367 ssize_t sret;
5368
5369 /*
5370 * Avoid more than one consumer on a single file descriptor
5371 * This is just a matter of traces coherency, the ring buffer itself
5372 * is protected.
5373 */
5374 mutex_lock(&iter->mutex);
5375
5376 /* return any leftover data */
5377 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5378 if (sret != -EBUSY)
5379 goto out;
5380
5381 trace_seq_init(&iter->seq);
5382
5383 if (iter->trace->read) {
5384 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5385 if (sret)
5386 goto out;
5387 }
5388
5389 waitagain:
5390 sret = tracing_wait_pipe(filp);
5391 if (sret <= 0)
5392 goto out;
5393
5394 /* stop when tracing is finished */
5395 if (trace_empty(iter)) {
5396 sret = 0;
5397 goto out;
5398 }
5399
5400 if (cnt >= PAGE_SIZE)
5401 cnt = PAGE_SIZE - 1;
5402
5403 /* reset all but tr, trace, and overruns */
5404 memset(&iter->seq, 0,
5405 sizeof(struct trace_iterator) -
5406 offsetof(struct trace_iterator, seq));
5407 cpumask_clear(iter->started);
5408 iter->pos = -1;
5409
5410 trace_event_read_lock();
5411 trace_access_lock(iter->cpu_file);
5412 while (trace_find_next_entry_inc(iter) != NULL) {
5413 enum print_line_t ret;
5414 int save_len = iter->seq.seq.len;
5415
5416 ret = print_trace_line(iter);
5417 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5418 /* don't print partial lines */
5419 iter->seq.seq.len = save_len;
5420 break;
5421 }
5422 if (ret != TRACE_TYPE_NO_CONSUME)
5423 trace_consume(iter);
5424
5425 if (trace_seq_used(&iter->seq) >= cnt)
5426 break;
5427
5428 /*
5429 * Setting the full flag means we reached the trace_seq buffer
5430 * size and we should leave by partial output condition above.
5431 * One of the trace_seq_* functions is not used properly.
5432 */
5433 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5434 iter->ent->type);
5435 }
5436 trace_access_unlock(iter->cpu_file);
5437 trace_event_read_unlock();
5438
5439 /* Now copy what we have to the user */
5440 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5441 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5442 trace_seq_init(&iter->seq);
5443
5444 /*
5445 * If there was nothing to send to user, in spite of consuming trace
5446 * entries, go back to wait for more entries.
5447 */
5448 if (sret == -EBUSY)
5449 goto waitagain;
5450
5451 out:
5452 mutex_unlock(&iter->mutex);
5453
5454 return sret;
5455 }
5456
5457 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5458 unsigned int idx)
5459 {
5460 __free_page(spd->pages[idx]);
5461 }
5462
5463 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5464 .can_merge = 0,
5465 .confirm = generic_pipe_buf_confirm,
5466 .release = generic_pipe_buf_release,
5467 .steal = generic_pipe_buf_steal,
5468 .get = generic_pipe_buf_get,
5469 };
5470
5471 static size_t
5472 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5473 {
5474 size_t count;
5475 int save_len;
5476 int ret;
5477
5478 /* Seq buffer is page-sized, exactly what we need. */
5479 for (;;) {
5480 save_len = iter->seq.seq.len;
5481 ret = print_trace_line(iter);
5482
5483 if (trace_seq_has_overflowed(&iter->seq)) {
5484 iter->seq.seq.len = save_len;
5485 break;
5486 }
5487
5488 /*
5489 * This should not be hit, because it should only
5490 * be set if the iter->seq overflowed. But check it
5491 * anyway to be safe.
5492 */
5493 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5494 iter->seq.seq.len = save_len;
5495 break;
5496 }
5497
5498 count = trace_seq_used(&iter->seq) - save_len;
5499 if (rem < count) {
5500 rem = 0;
5501 iter->seq.seq.len = save_len;
5502 break;
5503 }
5504
5505 if (ret != TRACE_TYPE_NO_CONSUME)
5506 trace_consume(iter);
5507 rem -= count;
5508 if (!trace_find_next_entry_inc(iter)) {
5509 rem = 0;
5510 iter->ent = NULL;
5511 break;
5512 }
5513 }
5514
5515 return rem;
5516 }
5517
5518 static ssize_t tracing_splice_read_pipe(struct file *filp,
5519 loff_t *ppos,
5520 struct pipe_inode_info *pipe,
5521 size_t len,
5522 unsigned int flags)
5523 {
5524 struct page *pages_def[PIPE_DEF_BUFFERS];
5525 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5526 struct trace_iterator *iter = filp->private_data;
5527 struct splice_pipe_desc spd = {
5528 .pages = pages_def,
5529 .partial = partial_def,
5530 .nr_pages = 0, /* This gets updated below. */
5531 .nr_pages_max = PIPE_DEF_BUFFERS,
5532 .flags = flags,
5533 .ops = &tracing_pipe_buf_ops,
5534 .spd_release = tracing_spd_release_pipe,
5535 };
5536 ssize_t ret;
5537 size_t rem;
5538 unsigned int i;
5539
5540 if (splice_grow_spd(pipe, &spd))
5541 return -ENOMEM;
5542
5543 mutex_lock(&iter->mutex);
5544
5545 if (iter->trace->splice_read) {
5546 ret = iter->trace->splice_read(iter, filp,
5547 ppos, pipe, len, flags);
5548 if (ret)
5549 goto out_err;
5550 }
5551
5552 ret = tracing_wait_pipe(filp);
5553 if (ret <= 0)
5554 goto out_err;
5555
5556 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5557 ret = -EFAULT;
5558 goto out_err;
5559 }
5560
5561 trace_event_read_lock();
5562 trace_access_lock(iter->cpu_file);
5563
5564 /* Fill as many pages as possible. */
5565 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5566 spd.pages[i] = alloc_page(GFP_KERNEL);
5567 if (!spd.pages[i])
5568 break;
5569
5570 rem = tracing_fill_pipe_page(rem, iter);
5571
5572 /* Copy the data into the page, so we can start over. */
5573 ret = trace_seq_to_buffer(&iter->seq,
5574 page_address(spd.pages[i]),
5575 trace_seq_used(&iter->seq));
5576 if (ret < 0) {
5577 __free_page(spd.pages[i]);
5578 break;
5579 }
5580 spd.partial[i].offset = 0;
5581 spd.partial[i].len = trace_seq_used(&iter->seq);
5582
5583 trace_seq_init(&iter->seq);
5584 }
5585
5586 trace_access_unlock(iter->cpu_file);
5587 trace_event_read_unlock();
5588 mutex_unlock(&iter->mutex);
5589
5590 spd.nr_pages = i;
5591
5592 if (i)
5593 ret = splice_to_pipe(pipe, &spd);
5594 else
5595 ret = 0;
5596 out:
5597 splice_shrink_spd(&spd);
5598 return ret;
5599
5600 out_err:
5601 mutex_unlock(&iter->mutex);
5602 goto out;
5603 }
5604
5605 static ssize_t
5606 tracing_entries_read(struct file *filp, char __user *ubuf,
5607 size_t cnt, loff_t *ppos)
5608 {
5609 struct inode *inode = file_inode(filp);
5610 struct trace_array *tr = inode->i_private;
5611 int cpu = tracing_get_cpu(inode);
5612 char buf[64];
5613 int r = 0;
5614 ssize_t ret;
5615
5616 mutex_lock(&trace_types_lock);
5617
5618 if (cpu == RING_BUFFER_ALL_CPUS) {
5619 int cpu, buf_size_same;
5620 unsigned long size;
5621
5622 size = 0;
5623 buf_size_same = 1;
5624 /* check if all cpu sizes are same */
5625 for_each_tracing_cpu(cpu) {
5626 /* fill in the size from first enabled cpu */
5627 if (size == 0)
5628 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5629 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5630 buf_size_same = 0;
5631 break;
5632 }
5633 }
5634
5635 if (buf_size_same) {
5636 if (!ring_buffer_expanded)
5637 r = sprintf(buf, "%lu (expanded: %lu)\n",
5638 size >> 10,
5639 trace_buf_size >> 10);
5640 else
5641 r = sprintf(buf, "%lu\n", size >> 10);
5642 } else
5643 r = sprintf(buf, "X\n");
5644 } else
5645 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5646
5647 mutex_unlock(&trace_types_lock);
5648
5649 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5650 return ret;
5651 }
5652
5653 static ssize_t
5654 tracing_entries_write(struct file *filp, const char __user *ubuf,
5655 size_t cnt, loff_t *ppos)
5656 {
5657 struct inode *inode = file_inode(filp);
5658 struct trace_array *tr = inode->i_private;
5659 unsigned long val;
5660 int ret;
5661
5662 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5663 if (ret)
5664 return ret;
5665
5666 /* must have at least 1 entry */
5667 if (!val)
5668 return -EINVAL;
5669
5670 /* value is in KB */
5671 val <<= 10;
5672 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5673 if (ret < 0)
5674 return ret;
5675
5676 *ppos += cnt;
5677
5678 return cnt;
5679 }
5680
5681 static ssize_t
5682 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5683 size_t cnt, loff_t *ppos)
5684 {
5685 struct trace_array *tr = filp->private_data;
5686 char buf[64];
5687 int r, cpu;
5688 unsigned long size = 0, expanded_size = 0;
5689
5690 mutex_lock(&trace_types_lock);
5691 for_each_tracing_cpu(cpu) {
5692 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5693 if (!ring_buffer_expanded)
5694 expanded_size += trace_buf_size >> 10;
5695 }
5696 if (ring_buffer_expanded)
5697 r = sprintf(buf, "%lu\n", size);
5698 else
5699 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5700 mutex_unlock(&trace_types_lock);
5701
5702 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5703 }
5704
5705 static ssize_t
5706 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5707 size_t cnt, loff_t *ppos)
5708 {
5709 /*
5710 * There is no need to read what the user has written, this function
5711 * is just to make sure that there is no error when "echo" is used
5712 */
5713
5714 *ppos += cnt;
5715
5716 return cnt;
5717 }
5718
5719 static int
5720 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5721 {
5722 struct trace_array *tr = inode->i_private;
5723
5724 /* disable tracing ? */
5725 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5726 tracer_tracing_off(tr);
5727 /* resize the ring buffer to 0 */
5728 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5729
5730 trace_array_put(tr);
5731
5732 return 0;
5733 }
5734
5735 static ssize_t
5736 tracing_mark_write(struct file *filp, const char __user *ubuf,
5737 size_t cnt, loff_t *fpos)
5738 {
5739 struct trace_array *tr = filp->private_data;
5740 struct ring_buffer_event *event;
5741 struct ring_buffer *buffer;
5742 struct print_entry *entry;
5743 unsigned long irq_flags;
5744 const char faulted[] = "<faulted>";
5745 ssize_t written;
5746 int size;
5747 int len;
5748
5749 /* Used in tracing_mark_raw_write() as well */
5750 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5751
5752 if (tracing_disabled)
5753 return -EINVAL;
5754
5755 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5756 return -EINVAL;
5757
5758 if (cnt > TRACE_BUF_SIZE)
5759 cnt = TRACE_BUF_SIZE;
5760
5761 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5762
5763 local_save_flags(irq_flags);
5764 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5765
5766 /* If less than "<faulted>", then make sure we can still add that */
5767 if (cnt < FAULTED_SIZE)
5768 size += FAULTED_SIZE - cnt;
5769
5770 buffer = tr->trace_buffer.buffer;
5771 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5772 irq_flags, preempt_count());
5773 if (unlikely(!event))
5774 /* Ring buffer disabled, return as if not open for write */
5775 return -EBADF;
5776
5777 entry = ring_buffer_event_data(event);
5778 entry->ip = _THIS_IP_;
5779
5780 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5781 if (len) {
5782 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5783 cnt = FAULTED_SIZE;
5784 written = -EFAULT;
5785 } else
5786 written = cnt;
5787 len = cnt;
5788
5789 if (entry->buf[cnt - 1] != '\n') {
5790 entry->buf[cnt] = '\n';
5791 entry->buf[cnt + 1] = '\0';
5792 } else
5793 entry->buf[cnt] = '\0';
5794
5795 __buffer_unlock_commit(buffer, event);
5796
5797 if (written > 0)
5798 *fpos += written;
5799
5800 return written;
5801 }
5802
5803 /* Limit it for now to 3K (including tag) */
5804 #define RAW_DATA_MAX_SIZE (1024*3)
5805
5806 static ssize_t
5807 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5808 size_t cnt, loff_t *fpos)
5809 {
5810 struct trace_array *tr = filp->private_data;
5811 struct ring_buffer_event *event;
5812 struct ring_buffer *buffer;
5813 struct raw_data_entry *entry;
5814 const char faulted[] = "<faulted>";
5815 unsigned long irq_flags;
5816 ssize_t written;
5817 int size;
5818 int len;
5819
5820 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5821
5822 if (tracing_disabled)
5823 return -EINVAL;
5824
5825 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5826 return -EINVAL;
5827
5828 /* The marker must at least have a tag id */
5829 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5830 return -EINVAL;
5831
5832 if (cnt > TRACE_BUF_SIZE)
5833 cnt = TRACE_BUF_SIZE;
5834
5835 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5836
5837 local_save_flags(irq_flags);
5838 size = sizeof(*entry) + cnt;
5839 if (cnt < FAULT_SIZE_ID)
5840 size += FAULT_SIZE_ID - cnt;
5841
5842 buffer = tr->trace_buffer.buffer;
5843 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5844 irq_flags, preempt_count());
5845 if (!event)
5846 /* Ring buffer disabled, return as if not open for write */
5847 return -EBADF;
5848
5849 entry = ring_buffer_event_data(event);
5850
5851 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5852 if (len) {
5853 entry->id = -1;
5854 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5855 written = -EFAULT;
5856 } else
5857 written = cnt;
5858
5859 __buffer_unlock_commit(buffer, event);
5860
5861 if (written > 0)
5862 *fpos += written;
5863
5864 return written;
5865 }
5866
5867 static int tracing_clock_show(struct seq_file *m, void *v)
5868 {
5869 struct trace_array *tr = m->private;
5870 int i;
5871
5872 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5873 seq_printf(m,
5874 "%s%s%s%s", i ? " " : "",
5875 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5876 i == tr->clock_id ? "]" : "");
5877 seq_putc(m, '\n');
5878
5879 return 0;
5880 }
5881
5882 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5883 {
5884 int i;
5885
5886 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5887 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5888 break;
5889 }
5890 if (i == ARRAY_SIZE(trace_clocks))
5891 return -EINVAL;
5892
5893 mutex_lock(&trace_types_lock);
5894
5895 tr->clock_id = i;
5896
5897 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5898
5899 /*
5900 * New clock may not be consistent with the previous clock.
5901 * Reset the buffer so that it doesn't have incomparable timestamps.
5902 */
5903 tracing_reset_online_cpus(&tr->trace_buffer);
5904
5905 #ifdef CONFIG_TRACER_MAX_TRACE
5906 if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5907 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5908 tracing_reset_online_cpus(&tr->max_buffer);
5909 #endif
5910
5911 mutex_unlock(&trace_types_lock);
5912
5913 return 0;
5914 }
5915
5916 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5917 size_t cnt, loff_t *fpos)
5918 {
5919 struct seq_file *m = filp->private_data;
5920 struct trace_array *tr = m->private;
5921 char buf[64];
5922 const char *clockstr;
5923 int ret;
5924
5925 if (cnt >= sizeof(buf))
5926 return -EINVAL;
5927
5928 if (copy_from_user(buf, ubuf, cnt))
5929 return -EFAULT;
5930
5931 buf[cnt] = 0;
5932
5933 clockstr = strstrip(buf);
5934
5935 ret = tracing_set_clock(tr, clockstr);
5936 if (ret)
5937 return ret;
5938
5939 *fpos += cnt;
5940
5941 return cnt;
5942 }
5943
5944 static int tracing_clock_open(struct inode *inode, struct file *file)
5945 {
5946 struct trace_array *tr = inode->i_private;
5947 int ret;
5948
5949 if (tracing_disabled)
5950 return -ENODEV;
5951
5952 if (trace_array_get(tr))
5953 return -ENODEV;
5954
5955 ret = single_open(file, tracing_clock_show, inode->i_private);
5956 if (ret < 0)
5957 trace_array_put(tr);
5958
5959 return ret;
5960 }
5961
5962 struct ftrace_buffer_info {
5963 struct trace_iterator iter;
5964 void *spare;
5965 unsigned int read;
5966 };
5967
5968 #ifdef CONFIG_TRACER_SNAPSHOT
5969 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5970 {
5971 struct trace_array *tr = inode->i_private;
5972 struct trace_iterator *iter;
5973 struct seq_file *m;
5974 int ret = 0;
5975
5976 if (trace_array_get(tr) < 0)
5977 return -ENODEV;
5978
5979 if (file->f_mode & FMODE_READ) {
5980 iter = __tracing_open(inode, file, true);
5981 if (IS_ERR(iter))
5982 ret = PTR_ERR(iter);
5983 } else {
5984 /* Writes still need the seq_file to hold the private data */
5985 ret = -ENOMEM;
5986 m = kzalloc(sizeof(*m), GFP_KERNEL);
5987 if (!m)
5988 goto out;
5989 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5990 if (!iter) {
5991 kfree(m);
5992 goto out;
5993 }
5994 ret = 0;
5995
5996 iter->tr = tr;
5997 iter->trace_buffer = &tr->max_buffer;
5998 iter->cpu_file = tracing_get_cpu(inode);
5999 m->private = iter;
6000 file->private_data = m;
6001 }
6002 out:
6003 if (ret < 0)
6004 trace_array_put(tr);
6005
6006 return ret;
6007 }
6008
6009 static ssize_t
6010 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6011 loff_t *ppos)
6012 {
6013 struct seq_file *m = filp->private_data;
6014 struct trace_iterator *iter = m->private;
6015 struct trace_array *tr = iter->tr;
6016 unsigned long val;
6017 int ret;
6018
6019 ret = tracing_update_buffers();
6020 if (ret < 0)
6021 return ret;
6022
6023 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6024 if (ret)
6025 return ret;
6026
6027 mutex_lock(&trace_types_lock);
6028
6029 if (tr->current_trace->use_max_tr) {
6030 ret = -EBUSY;
6031 goto out;
6032 }
6033
6034 switch (val) {
6035 case 0:
6036 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6037 ret = -EINVAL;
6038 break;
6039 }
6040 if (tr->allocated_snapshot)
6041 free_snapshot(tr);
6042 break;
6043 case 1:
6044 /* Only allow per-cpu swap if the ring buffer supports it */
6045 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6046 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6047 ret = -EINVAL;
6048 break;
6049 }
6050 #endif
6051 if (!tr->allocated_snapshot) {
6052 ret = alloc_snapshot(tr);
6053 if (ret < 0)
6054 break;
6055 }
6056 local_irq_disable();
6057 /* Now, we're going to swap */
6058 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6059 update_max_tr(tr, current, smp_processor_id());
6060 else
6061 update_max_tr_single(tr, current, iter->cpu_file);
6062 local_irq_enable();
6063 break;
6064 default:
6065 if (tr->allocated_snapshot) {
6066 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6067 tracing_reset_online_cpus(&tr->max_buffer);
6068 else
6069 tracing_reset(&tr->max_buffer, iter->cpu_file);
6070 }
6071 break;
6072 }
6073
6074 if (ret >= 0) {
6075 *ppos += cnt;
6076 ret = cnt;
6077 }
6078 out:
6079 mutex_unlock(&trace_types_lock);
6080 return ret;
6081 }
6082
6083 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6084 {
6085 struct seq_file *m = file->private_data;
6086 int ret;
6087
6088 ret = tracing_release(inode, file);
6089
6090 if (file->f_mode & FMODE_READ)
6091 return ret;
6092
6093 /* If write only, the seq_file is just a stub */
6094 if (m)
6095 kfree(m->private);
6096 kfree(m);
6097
6098 return 0;
6099 }
6100
6101 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6102 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6103 size_t count, loff_t *ppos);
6104 static int tracing_buffers_release(struct inode *inode, struct file *file);
6105 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6106 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6107
6108 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6109 {
6110 struct ftrace_buffer_info *info;
6111 int ret;
6112
6113 ret = tracing_buffers_open(inode, filp);
6114 if (ret < 0)
6115 return ret;
6116
6117 info = filp->private_data;
6118
6119 if (info->iter.trace->use_max_tr) {
6120 tracing_buffers_release(inode, filp);
6121 return -EBUSY;
6122 }
6123
6124 info->iter.snapshot = true;
6125 info->iter.trace_buffer = &info->iter.tr->max_buffer;
6126
6127 return ret;
6128 }
6129
6130 #endif /* CONFIG_TRACER_SNAPSHOT */
6131
6132
6133 static const struct file_operations tracing_thresh_fops = {
6134 .open = tracing_open_generic,
6135 .read = tracing_thresh_read,
6136 .write = tracing_thresh_write,
6137 .llseek = generic_file_llseek,
6138 };
6139
6140 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6141 static const struct file_operations tracing_max_lat_fops = {
6142 .open = tracing_open_generic,
6143 .read = tracing_max_lat_read,
6144 .write = tracing_max_lat_write,
6145 .llseek = generic_file_llseek,
6146 };
6147 #endif
6148
6149 static const struct file_operations set_tracer_fops = {
6150 .open = tracing_open_generic,
6151 .read = tracing_set_trace_read,
6152 .write = tracing_set_trace_write,
6153 .llseek = generic_file_llseek,
6154 };
6155
6156 static const struct file_operations tracing_pipe_fops = {
6157 .open = tracing_open_pipe,
6158 .poll = tracing_poll_pipe,
6159 .read = tracing_read_pipe,
6160 .splice_read = tracing_splice_read_pipe,
6161 .release = tracing_release_pipe,
6162 .llseek = no_llseek,
6163 };
6164
6165 static const struct file_operations tracing_entries_fops = {
6166 .open = tracing_open_generic_tr,
6167 .read = tracing_entries_read,
6168 .write = tracing_entries_write,
6169 .llseek = generic_file_llseek,
6170 .release = tracing_release_generic_tr,
6171 };
6172
6173 static const struct file_operations tracing_total_entries_fops = {
6174 .open = tracing_open_generic_tr,
6175 .read = tracing_total_entries_read,
6176 .llseek = generic_file_llseek,
6177 .release = tracing_release_generic_tr,
6178 };
6179
6180 static const struct file_operations tracing_free_buffer_fops = {
6181 .open = tracing_open_generic_tr,
6182 .write = tracing_free_buffer_write,
6183 .release = tracing_free_buffer_release,
6184 };
6185
6186 static const struct file_operations tracing_mark_fops = {
6187 .open = tracing_open_generic_tr,
6188 .write = tracing_mark_write,
6189 .llseek = generic_file_llseek,
6190 .release = tracing_release_generic_tr,
6191 };
6192
6193 static const struct file_operations tracing_mark_raw_fops = {
6194 .open = tracing_open_generic_tr,
6195 .write = tracing_mark_raw_write,
6196 .llseek = generic_file_llseek,
6197 .release = tracing_release_generic_tr,
6198 };
6199
6200 static const struct file_operations trace_clock_fops = {
6201 .open = tracing_clock_open,
6202 .read = seq_read,
6203 .llseek = seq_lseek,
6204 .release = tracing_single_release_tr,
6205 .write = tracing_clock_write,
6206 };
6207
6208 #ifdef CONFIG_TRACER_SNAPSHOT
6209 static const struct file_operations snapshot_fops = {
6210 .open = tracing_snapshot_open,
6211 .read = seq_read,
6212 .write = tracing_snapshot_write,
6213 .llseek = tracing_lseek,
6214 .release = tracing_snapshot_release,
6215 };
6216
6217 static const struct file_operations snapshot_raw_fops = {
6218 .open = snapshot_raw_open,
6219 .read = tracing_buffers_read,
6220 .release = tracing_buffers_release,
6221 .splice_read = tracing_buffers_splice_read,
6222 .llseek = no_llseek,
6223 };
6224
6225 #endif /* CONFIG_TRACER_SNAPSHOT */
6226
6227 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6228 {
6229 struct trace_array *tr = inode->i_private;
6230 struct ftrace_buffer_info *info;
6231 int ret;
6232
6233 if (tracing_disabled)
6234 return -ENODEV;
6235
6236 if (trace_array_get(tr) < 0)
6237 return -ENODEV;
6238
6239 info = kzalloc(sizeof(*info), GFP_KERNEL);
6240 if (!info) {
6241 trace_array_put(tr);
6242 return -ENOMEM;
6243 }
6244
6245 mutex_lock(&trace_types_lock);
6246
6247 info->iter.tr = tr;
6248 info->iter.cpu_file = tracing_get_cpu(inode);
6249 info->iter.trace = tr->current_trace;
6250 info->iter.trace_buffer = &tr->trace_buffer;
6251 info->spare = NULL;
6252 /* Force reading ring buffer for first read */
6253 info->read = (unsigned int)-1;
6254
6255 filp->private_data = info;
6256
6257 tr->current_trace->ref++;
6258
6259 mutex_unlock(&trace_types_lock);
6260
6261 ret = nonseekable_open(inode, filp);
6262 if (ret < 0)
6263 trace_array_put(tr);
6264
6265 return ret;
6266 }
6267
6268 static unsigned int
6269 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6270 {
6271 struct ftrace_buffer_info *info = filp->private_data;
6272 struct trace_iterator *iter = &info->iter;
6273
6274 return trace_poll(iter, filp, poll_table);
6275 }
6276
6277 static ssize_t
6278 tracing_buffers_read(struct file *filp, char __user *ubuf,
6279 size_t count, loff_t *ppos)
6280 {
6281 struct ftrace_buffer_info *info = filp->private_data;
6282 struct trace_iterator *iter = &info->iter;
6283 ssize_t ret;
6284 ssize_t size;
6285
6286 if (!count)
6287 return 0;
6288
6289 #ifdef CONFIG_TRACER_MAX_TRACE
6290 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6291 return -EBUSY;
6292 #endif
6293
6294 if (!info->spare)
6295 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6296 iter->cpu_file);
6297 if (!info->spare)
6298 return -ENOMEM;
6299
6300 /* Do we have previous read data to read? */
6301 if (info->read < PAGE_SIZE)
6302 goto read;
6303
6304 again:
6305 trace_access_lock(iter->cpu_file);
6306 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6307 &info->spare,
6308 count,
6309 iter->cpu_file, 0);
6310 trace_access_unlock(iter->cpu_file);
6311
6312 if (ret < 0) {
6313 if (trace_empty(iter)) {
6314 if ((filp->f_flags & O_NONBLOCK))
6315 return -EAGAIN;
6316
6317 ret = wait_on_pipe(iter, false);
6318 if (ret)
6319 return ret;
6320
6321 goto again;
6322 }
6323 return 0;
6324 }
6325
6326 info->read = 0;
6327 read:
6328 size = PAGE_SIZE - info->read;
6329 if (size > count)
6330 size = count;
6331
6332 ret = copy_to_user(ubuf, info->spare + info->read, size);
6333 if (ret == size)
6334 return -EFAULT;
6335
6336 size -= ret;
6337
6338 *ppos += size;
6339 info->read += size;
6340
6341 return size;
6342 }
6343
6344 static int tracing_buffers_release(struct inode *inode, struct file *file)
6345 {
6346 struct ftrace_buffer_info *info = file->private_data;
6347 struct trace_iterator *iter = &info->iter;
6348
6349 mutex_lock(&trace_types_lock);
6350
6351 iter->tr->current_trace->ref--;
6352
6353 __trace_array_put(iter->tr);
6354
6355 if (info->spare)
6356 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6357 kfree(info);
6358
6359 mutex_unlock(&trace_types_lock);
6360
6361 return 0;
6362 }
6363
6364 struct buffer_ref {
6365 struct ring_buffer *buffer;
6366 void *page;
6367 int ref;
6368 };
6369
6370 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6371 struct pipe_buffer *buf)
6372 {
6373 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6374
6375 if (--ref->ref)
6376 return;
6377
6378 ring_buffer_free_read_page(ref->buffer, ref->page);
6379 kfree(ref);
6380 buf->private = 0;
6381 }
6382
6383 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6384 struct pipe_buffer *buf)
6385 {
6386 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6387
6388 ref->ref++;
6389 }
6390
6391 /* Pipe buffer operations for a buffer. */
6392 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6393 .can_merge = 0,
6394 .confirm = generic_pipe_buf_confirm,
6395 .release = buffer_pipe_buf_release,
6396 .steal = generic_pipe_buf_steal,
6397 .get = buffer_pipe_buf_get,
6398 };
6399
6400 /*
6401 * Callback from splice_to_pipe(), if we need to release some pages
6402 * at the end of the spd in case we error'ed out in filling the pipe.
6403 */
6404 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6405 {
6406 struct buffer_ref *ref =
6407 (struct buffer_ref *)spd->partial[i].private;
6408
6409 if (--ref->ref)
6410 return;
6411
6412 ring_buffer_free_read_page(ref->buffer, ref->page);
6413 kfree(ref);
6414 spd->partial[i].private = 0;
6415 }
6416
6417 static ssize_t
6418 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6419 struct pipe_inode_info *pipe, size_t len,
6420 unsigned int flags)
6421 {
6422 struct ftrace_buffer_info *info = file->private_data;
6423 struct trace_iterator *iter = &info->iter;
6424 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6425 struct page *pages_def[PIPE_DEF_BUFFERS];
6426 struct splice_pipe_desc spd = {
6427 .pages = pages_def,
6428 .partial = partial_def,
6429 .nr_pages_max = PIPE_DEF_BUFFERS,
6430 .flags = flags,
6431 .ops = &buffer_pipe_buf_ops,
6432 .spd_release = buffer_spd_release,
6433 };
6434 struct buffer_ref *ref;
6435 int entries, size, i;
6436 ssize_t ret = 0;
6437
6438 #ifdef CONFIG_TRACER_MAX_TRACE
6439 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6440 return -EBUSY;
6441 #endif
6442
6443 if (*ppos & (PAGE_SIZE - 1))
6444 return -EINVAL;
6445
6446 if (len & (PAGE_SIZE - 1)) {
6447 if (len < PAGE_SIZE)
6448 return -EINVAL;
6449 len &= PAGE_MASK;
6450 }
6451
6452 if (splice_grow_spd(pipe, &spd))
6453 return -ENOMEM;
6454
6455 again:
6456 trace_access_lock(iter->cpu_file);
6457 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6458
6459 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6460 struct page *page;
6461 int r;
6462
6463 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6464 if (!ref) {
6465 ret = -ENOMEM;
6466 break;
6467 }
6468
6469 ref->ref = 1;
6470 ref->buffer = iter->trace_buffer->buffer;
6471 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6472 if (!ref->page) {
6473 ret = -ENOMEM;
6474 kfree(ref);
6475 break;
6476 }
6477
6478 r = ring_buffer_read_page(ref->buffer, &ref->page,
6479 len, iter->cpu_file, 1);
6480 if (r < 0) {
6481 ring_buffer_free_read_page(ref->buffer, ref->page);
6482 kfree(ref);
6483 break;
6484 }
6485
6486 /*
6487 * zero out any left over data, this is going to
6488 * user land.
6489 */
6490 size = ring_buffer_page_len(ref->page);
6491 if (size < PAGE_SIZE)
6492 memset(ref->page + size, 0, PAGE_SIZE - size);
6493
6494 page = virt_to_page(ref->page);
6495
6496 spd.pages[i] = page;
6497 spd.partial[i].len = PAGE_SIZE;
6498 spd.partial[i].offset = 0;
6499 spd.partial[i].private = (unsigned long)ref;
6500 spd.nr_pages++;
6501 *ppos += PAGE_SIZE;
6502
6503 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6504 }
6505
6506 trace_access_unlock(iter->cpu_file);
6507 spd.nr_pages = i;
6508
6509 /* did we read anything? */
6510 if (!spd.nr_pages) {
6511 if (ret)
6512 goto out;
6513
6514 ret = -EAGAIN;
6515 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6516 goto out;
6517
6518 ret = wait_on_pipe(iter, true);
6519 if (ret)
6520 goto out;
6521
6522 goto again;
6523 }
6524
6525 ret = splice_to_pipe(pipe, &spd);
6526 out:
6527 splice_shrink_spd(&spd);
6528
6529 return ret;
6530 }
6531
6532 static const struct file_operations tracing_buffers_fops = {
6533 .open = tracing_buffers_open,
6534 .read = tracing_buffers_read,
6535 .poll = tracing_buffers_poll,
6536 .release = tracing_buffers_release,
6537 .splice_read = tracing_buffers_splice_read,
6538 .llseek = no_llseek,
6539 };
6540
6541 static ssize_t
6542 tracing_stats_read(struct file *filp, char __user *ubuf,
6543 size_t count, loff_t *ppos)
6544 {
6545 struct inode *inode = file_inode(filp);
6546 struct trace_array *tr = inode->i_private;
6547 struct trace_buffer *trace_buf = &tr->trace_buffer;
6548 int cpu = tracing_get_cpu(inode);
6549 struct trace_seq *s;
6550 unsigned long cnt;
6551 unsigned long long t;
6552 unsigned long usec_rem;
6553
6554 s = kmalloc(sizeof(*s), GFP_KERNEL);
6555 if (!s)
6556 return -ENOMEM;
6557
6558 trace_seq_init(s);
6559
6560 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6561 trace_seq_printf(s, "entries: %ld\n", cnt);
6562
6563 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6564 trace_seq_printf(s, "overrun: %ld\n", cnt);
6565
6566 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6567 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6568
6569 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6570 trace_seq_printf(s, "bytes: %ld\n", cnt);
6571
6572 if (trace_clocks[tr->clock_id].in_ns) {
6573 /* local or global for trace_clock */
6574 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6575 usec_rem = do_div(t, USEC_PER_SEC);
6576 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6577 t, usec_rem);
6578
6579 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6580 usec_rem = do_div(t, USEC_PER_SEC);
6581 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6582 } else {
6583 /* counter or tsc mode for trace_clock */
6584 trace_seq_printf(s, "oldest event ts: %llu\n",
6585 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6586
6587 trace_seq_printf(s, "now ts: %llu\n",
6588 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6589 }
6590
6591 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6592 trace_seq_printf(s, "dropped events: %ld\n", cnt);
6593
6594 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6595 trace_seq_printf(s, "read events: %ld\n", cnt);
6596
6597 count = simple_read_from_buffer(ubuf, count, ppos,
6598 s->buffer, trace_seq_used(s));
6599
6600 kfree(s);
6601
6602 return count;
6603 }
6604
6605 static const struct file_operations tracing_stats_fops = {
6606 .open = tracing_open_generic_tr,
6607 .read = tracing_stats_read,
6608 .llseek = generic_file_llseek,
6609 .release = tracing_release_generic_tr,
6610 };
6611
6612 #ifdef CONFIG_DYNAMIC_FTRACE
6613
6614 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6615 {
6616 return 0;
6617 }
6618
6619 static ssize_t
6620 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6621 size_t cnt, loff_t *ppos)
6622 {
6623 static char ftrace_dyn_info_buffer[1024];
6624 static DEFINE_MUTEX(dyn_info_mutex);
6625 unsigned long *p = filp->private_data;
6626 char *buf = ftrace_dyn_info_buffer;
6627 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6628 int r;
6629
6630 mutex_lock(&dyn_info_mutex);
6631 r = sprintf(buf, "%ld ", *p);
6632
6633 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6634 buf[r++] = '\n';
6635
6636 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6637
6638 mutex_unlock(&dyn_info_mutex);
6639
6640 return r;
6641 }
6642
6643 static const struct file_operations tracing_dyn_info_fops = {
6644 .open = tracing_open_generic,
6645 .read = tracing_read_dyn_info,
6646 .llseek = generic_file_llseek,
6647 };
6648 #endif /* CONFIG_DYNAMIC_FTRACE */
6649
6650 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6651 static void
6652 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6653 {
6654 tracing_snapshot();
6655 }
6656
6657 static void
6658 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6659 {
6660 unsigned long *count = (long *)data;
6661
6662 if (!*count)
6663 return;
6664
6665 if (*count != -1)
6666 (*count)--;
6667
6668 tracing_snapshot();
6669 }
6670
6671 static int
6672 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6673 struct ftrace_probe_ops *ops, void *data)
6674 {
6675 long count = (long)data;
6676
6677 seq_printf(m, "%ps:", (void *)ip);
6678
6679 seq_puts(m, "snapshot");
6680
6681 if (count == -1)
6682 seq_puts(m, ":unlimited\n");
6683 else
6684 seq_printf(m, ":count=%ld\n", count);
6685
6686 return 0;
6687 }
6688
6689 static struct ftrace_probe_ops snapshot_probe_ops = {
6690 .func = ftrace_snapshot,
6691 .print = ftrace_snapshot_print,
6692 };
6693
6694 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6695 .func = ftrace_count_snapshot,
6696 .print = ftrace_snapshot_print,
6697 };
6698
6699 static int
6700 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6701 char *glob, char *cmd, char *param, int enable)
6702 {
6703 struct ftrace_probe_ops *ops;
6704 void *count = (void *)-1;
6705 char *number;
6706 int ret;
6707
6708 /* hash funcs only work with set_ftrace_filter */
6709 if (!enable)
6710 return -EINVAL;
6711
6712 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
6713
6714 if (glob[0] == '!') {
6715 unregister_ftrace_function_probe_func(glob+1, ops);
6716 return 0;
6717 }
6718
6719 if (!param)
6720 goto out_reg;
6721
6722 number = strsep(&param, ":");
6723
6724 if (!strlen(number))
6725 goto out_reg;
6726
6727 /*
6728 * We use the callback data field (which is a pointer)
6729 * as our counter.
6730 */
6731 ret = kstrtoul(number, 0, (unsigned long *)&count);
6732 if (ret)
6733 return ret;
6734
6735 out_reg:
6736 ret = register_ftrace_function_probe(glob, ops, count);
6737
6738 if (ret >= 0)
6739 alloc_snapshot(&global_trace);
6740
6741 return ret < 0 ? ret : 0;
6742 }
6743
6744 static struct ftrace_func_command ftrace_snapshot_cmd = {
6745 .name = "snapshot",
6746 .func = ftrace_trace_snapshot_callback,
6747 };
6748
6749 static __init int register_snapshot_cmd(void)
6750 {
6751 return register_ftrace_command(&ftrace_snapshot_cmd);
6752 }
6753 #else
6754 static inline __init int register_snapshot_cmd(void) { return 0; }
6755 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6756
6757 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6758 {
6759 if (WARN_ON(!tr->dir))
6760 return ERR_PTR(-ENODEV);
6761
6762 /* Top directory uses NULL as the parent */
6763 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6764 return NULL;
6765
6766 /* All sub buffers have a descriptor */
6767 return tr->dir;
6768 }
6769
6770 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6771 {
6772 struct dentry *d_tracer;
6773
6774 if (tr->percpu_dir)
6775 return tr->percpu_dir;
6776
6777 d_tracer = tracing_get_dentry(tr);
6778 if (IS_ERR(d_tracer))
6779 return NULL;
6780
6781 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6782
6783 WARN_ONCE(!tr->percpu_dir,
6784 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6785
6786 return tr->percpu_dir;
6787 }
6788
6789 static struct dentry *
6790 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6791 void *data, long cpu, const struct file_operations *fops)
6792 {
6793 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6794
6795 if (ret) /* See tracing_get_cpu() */
6796 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6797 return ret;
6798 }
6799
6800 static void
6801 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6802 {
6803 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6804 struct dentry *d_cpu;
6805 char cpu_dir[30]; /* 30 characters should be more than enough */
6806
6807 if (!d_percpu)
6808 return;
6809
6810 snprintf(cpu_dir, 30, "cpu%ld", cpu);
6811 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6812 if (!d_cpu) {
6813 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6814 return;
6815 }
6816
6817 /* per cpu trace_pipe */
6818 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6819 tr, cpu, &tracing_pipe_fops);
6820
6821 /* per cpu trace */
6822 trace_create_cpu_file("trace", 0644, d_cpu,
6823 tr, cpu, &tracing_fops);
6824
6825 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6826 tr, cpu, &tracing_buffers_fops);
6827
6828 trace_create_cpu_file("stats", 0444, d_cpu,
6829 tr, cpu, &tracing_stats_fops);
6830
6831 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6832 tr, cpu, &tracing_entries_fops);
6833
6834 #ifdef CONFIG_TRACER_SNAPSHOT
6835 trace_create_cpu_file("snapshot", 0644, d_cpu,
6836 tr, cpu, &snapshot_fops);
6837
6838 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6839 tr, cpu, &snapshot_raw_fops);
6840 #endif
6841 }
6842
6843 #ifdef CONFIG_FTRACE_SELFTEST
6844 /* Let selftest have access to static functions in this file */
6845 #include "trace_selftest.c"
6846 #endif
6847
6848 static ssize_t
6849 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6850 loff_t *ppos)
6851 {
6852 struct trace_option_dentry *topt = filp->private_data;
6853 char *buf;
6854
6855 if (topt->flags->val & topt->opt->bit)
6856 buf = "1\n";
6857 else
6858 buf = "0\n";
6859
6860 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6861 }
6862
6863 static ssize_t
6864 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6865 loff_t *ppos)
6866 {
6867 struct trace_option_dentry *topt = filp->private_data;
6868 unsigned long val;
6869 int ret;
6870
6871 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6872 if (ret)
6873 return ret;
6874
6875 if (val != 0 && val != 1)
6876 return -EINVAL;
6877
6878 if (!!(topt->flags->val & topt->opt->bit) != val) {
6879 mutex_lock(&trace_types_lock);
6880 ret = __set_tracer_option(topt->tr, topt->flags,
6881 topt->opt, !val);
6882 mutex_unlock(&trace_types_lock);
6883 if (ret)
6884 return ret;
6885 }
6886
6887 *ppos += cnt;
6888
6889 return cnt;
6890 }
6891
6892
6893 static const struct file_operations trace_options_fops = {
6894 .open = tracing_open_generic,
6895 .read = trace_options_read,
6896 .write = trace_options_write,
6897 .llseek = generic_file_llseek,
6898 };
6899
6900 /*
6901 * In order to pass in both the trace_array descriptor as well as the index
6902 * to the flag that the trace option file represents, the trace_array
6903 * has a character array of trace_flags_index[], which holds the index
6904 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6905 * The address of this character array is passed to the flag option file
6906 * read/write callbacks.
6907 *
6908 * In order to extract both the index and the trace_array descriptor,
6909 * get_tr_index() uses the following algorithm.
6910 *
6911 * idx = *ptr;
6912 *
6913 * As the pointer itself contains the address of the index (remember
6914 * index[1] == 1).
6915 *
6916 * Then to get the trace_array descriptor, by subtracting that index
6917 * from the ptr, we get to the start of the index itself.
6918 *
6919 * ptr - idx == &index[0]
6920 *
6921 * Then a simple container_of() from that pointer gets us to the
6922 * trace_array descriptor.
6923 */
6924 static void get_tr_index(void *data, struct trace_array **ptr,
6925 unsigned int *pindex)
6926 {
6927 *pindex = *(unsigned char *)data;
6928
6929 *ptr = container_of(data - *pindex, struct trace_array,
6930 trace_flags_index);
6931 }
6932
6933 static ssize_t
6934 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6935 loff_t *ppos)
6936 {
6937 void *tr_index = filp->private_data;
6938 struct trace_array *tr;
6939 unsigned int index;
6940 char *buf;
6941
6942 get_tr_index(tr_index, &tr, &index);
6943
6944 if (tr->trace_flags & (1 << index))
6945 buf = "1\n";
6946 else
6947 buf = "0\n";
6948
6949 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6950 }
6951
6952 static ssize_t
6953 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6954 loff_t *ppos)
6955 {
6956 void *tr_index = filp->private_data;
6957 struct trace_array *tr;
6958 unsigned int index;
6959 unsigned long val;
6960 int ret;
6961
6962 get_tr_index(tr_index, &tr, &index);
6963
6964 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6965 if (ret)
6966 return ret;
6967
6968 if (val != 0 && val != 1)
6969 return -EINVAL;
6970
6971 mutex_lock(&trace_types_lock);
6972 ret = set_tracer_flag(tr, 1 << index, val);
6973 mutex_unlock(&trace_types_lock);
6974
6975 if (ret < 0)
6976 return ret;
6977
6978 *ppos += cnt;
6979
6980 return cnt;
6981 }
6982
6983 static const struct file_operations trace_options_core_fops = {
6984 .open = tracing_open_generic,
6985 .read = trace_options_core_read,
6986 .write = trace_options_core_write,
6987 .llseek = generic_file_llseek,
6988 };
6989
6990 struct dentry *trace_create_file(const char *name,
6991 umode_t mode,
6992 struct dentry *parent,
6993 void *data,
6994 const struct file_operations *fops)
6995 {
6996 struct dentry *ret;
6997
6998 ret = tracefs_create_file(name, mode, parent, data, fops);
6999 if (!ret)
7000 pr_warn("Could not create tracefs '%s' entry\n", name);
7001
7002 return ret;
7003 }
7004
7005
7006 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7007 {
7008 struct dentry *d_tracer;
7009
7010 if (tr->options)
7011 return tr->options;
7012
7013 d_tracer = tracing_get_dentry(tr);
7014 if (IS_ERR(d_tracer))
7015 return NULL;
7016
7017 tr->options = tracefs_create_dir("options", d_tracer);
7018 if (!tr->options) {
7019 pr_warn("Could not create tracefs directory 'options'\n");
7020 return NULL;
7021 }
7022
7023 return tr->options;
7024 }
7025
7026 static void
7027 create_trace_option_file(struct trace_array *tr,
7028 struct trace_option_dentry *topt,
7029 struct tracer_flags *flags,
7030 struct tracer_opt *opt)
7031 {
7032 struct dentry *t_options;
7033
7034 t_options = trace_options_init_dentry(tr);
7035 if (!t_options)
7036 return;
7037
7038 topt->flags = flags;
7039 topt->opt = opt;
7040 topt->tr = tr;
7041
7042 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7043 &trace_options_fops);
7044
7045 }
7046
7047 static void
7048 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7049 {
7050 struct trace_option_dentry *topts;
7051 struct trace_options *tr_topts;
7052 struct tracer_flags *flags;
7053 struct tracer_opt *opts;
7054 int cnt;
7055 int i;
7056
7057 if (!tracer)
7058 return;
7059
7060 flags = tracer->flags;
7061
7062 if (!flags || !flags->opts)
7063 return;
7064
7065 /*
7066 * If this is an instance, only create flags for tracers
7067 * the instance may have.
7068 */
7069 if (!trace_ok_for_array(tracer, tr))
7070 return;
7071
7072 for (i = 0; i < tr->nr_topts; i++) {
7073 /* Make sure there's no duplicate flags. */
7074 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7075 return;
7076 }
7077
7078 opts = flags->opts;
7079
7080 for (cnt = 0; opts[cnt].name; cnt++)
7081 ;
7082
7083 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7084 if (!topts)
7085 return;
7086
7087 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7088 GFP_KERNEL);
7089 if (!tr_topts) {
7090 kfree(topts);
7091 return;
7092 }
7093
7094 tr->topts = tr_topts;
7095 tr->topts[tr->nr_topts].tracer = tracer;
7096 tr->topts[tr->nr_topts].topts = topts;
7097 tr->nr_topts++;
7098
7099 for (cnt = 0; opts[cnt].name; cnt++) {
7100 create_trace_option_file(tr, &topts[cnt], flags,
7101 &opts[cnt]);
7102 WARN_ONCE(topts[cnt].entry == NULL,
7103 "Failed to create trace option: %s",
7104 opts[cnt].name);
7105 }
7106 }
7107
7108 static struct dentry *
7109 create_trace_option_core_file(struct trace_array *tr,
7110 const char *option, long index)
7111 {
7112 struct dentry *t_options;
7113
7114 t_options = trace_options_init_dentry(tr);
7115 if (!t_options)
7116 return NULL;
7117
7118 return trace_create_file(option, 0644, t_options,
7119 (void *)&tr->trace_flags_index[index],
7120 &trace_options_core_fops);
7121 }
7122
7123 static void create_trace_options_dir(struct trace_array *tr)
7124 {
7125 struct dentry *t_options;
7126 bool top_level = tr == &global_trace;
7127 int i;
7128
7129 t_options = trace_options_init_dentry(tr);
7130 if (!t_options)
7131 return;
7132
7133 for (i = 0; trace_options[i]; i++) {
7134 if (top_level ||
7135 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7136 create_trace_option_core_file(tr, trace_options[i], i);
7137 }
7138 }
7139
7140 static ssize_t
7141 rb_simple_read(struct file *filp, char __user *ubuf,
7142 size_t cnt, loff_t *ppos)
7143 {
7144 struct trace_array *tr = filp->private_data;
7145 char buf[64];
7146 int r;
7147
7148 r = tracer_tracing_is_on(tr);
7149 r = sprintf(buf, "%d\n", r);
7150
7151 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7152 }
7153
7154 static ssize_t
7155 rb_simple_write(struct file *filp, const char __user *ubuf,
7156 size_t cnt, loff_t *ppos)
7157 {
7158 struct trace_array *tr = filp->private_data;
7159 struct ring_buffer *buffer = tr->trace_buffer.buffer;
7160 unsigned long val;
7161 int ret;
7162
7163 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7164 if (ret)
7165 return ret;
7166
7167 if (buffer) {
7168 mutex_lock(&trace_types_lock);
7169 if (val) {
7170 tracer_tracing_on(tr);
7171 if (tr->current_trace->start)
7172 tr->current_trace->start(tr);
7173 } else {
7174 tracer_tracing_off(tr);
7175 if (tr->current_trace->stop)
7176 tr->current_trace->stop(tr);
7177 }
7178 mutex_unlock(&trace_types_lock);
7179 }
7180
7181 (*ppos)++;
7182
7183 return cnt;
7184 }
7185
7186 static const struct file_operations rb_simple_fops = {
7187 .open = tracing_open_generic_tr,
7188 .read = rb_simple_read,
7189 .write = rb_simple_write,
7190 .release = tracing_release_generic_tr,
7191 .llseek = default_llseek,
7192 };
7193
7194 struct dentry *trace_instance_dir;
7195
7196 static void
7197 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7198
7199 static int
7200 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7201 {
7202 enum ring_buffer_flags rb_flags;
7203
7204 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7205
7206 buf->tr = tr;
7207
7208 buf->buffer = ring_buffer_alloc(size, rb_flags);
7209 if (!buf->buffer)
7210 return -ENOMEM;
7211
7212 buf->data = alloc_percpu(struct trace_array_cpu);
7213 if (!buf->data) {
7214 ring_buffer_free(buf->buffer);
7215 return -ENOMEM;
7216 }
7217
7218 /* Allocate the first page for all buffers */
7219 set_buffer_entries(&tr->trace_buffer,
7220 ring_buffer_size(tr->trace_buffer.buffer, 0));
7221
7222 return 0;
7223 }
7224
7225 static int allocate_trace_buffers(struct trace_array *tr, int size)
7226 {
7227 int ret;
7228
7229 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7230 if (ret)
7231 return ret;
7232
7233 #ifdef CONFIG_TRACER_MAX_TRACE
7234 ret = allocate_trace_buffer(tr, &tr->max_buffer,
7235 allocate_snapshot ? size : 1);
7236 if (WARN_ON(ret)) {
7237 ring_buffer_free(tr->trace_buffer.buffer);
7238 free_percpu(tr->trace_buffer.data);
7239 return -ENOMEM;
7240 }
7241 tr->allocated_snapshot = allocate_snapshot;
7242
7243 /*
7244 * Only the top level trace array gets its snapshot allocated
7245 * from the kernel command line.
7246 */
7247 allocate_snapshot = false;
7248 #endif
7249 return 0;
7250 }
7251
7252 static void free_trace_buffer(struct trace_buffer *buf)
7253 {
7254 if (buf->buffer) {
7255 ring_buffer_free(buf->buffer);
7256 buf->buffer = NULL;
7257 free_percpu(buf->data);
7258 buf->data = NULL;
7259 }
7260 }
7261
7262 static void free_trace_buffers(struct trace_array *tr)
7263 {
7264 if (!tr)
7265 return;
7266
7267 free_trace_buffer(&tr->trace_buffer);
7268
7269 #ifdef CONFIG_TRACER_MAX_TRACE
7270 free_trace_buffer(&tr->max_buffer);
7271 #endif
7272 }
7273
7274 static void init_trace_flags_index(struct trace_array *tr)
7275 {
7276 int i;
7277
7278 /* Used by the trace options files */
7279 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7280 tr->trace_flags_index[i] = i;
7281 }
7282
7283 static void __update_tracer_options(struct trace_array *tr)
7284 {
7285 struct tracer *t;
7286
7287 for (t = trace_types; t; t = t->next)
7288 add_tracer_options(tr, t);
7289 }
7290
7291 static void update_tracer_options(struct trace_array *tr)
7292 {
7293 mutex_lock(&trace_types_lock);
7294 __update_tracer_options(tr);
7295 mutex_unlock(&trace_types_lock);
7296 }
7297
7298 static int instance_mkdir(const char *name)
7299 {
7300 struct trace_array *tr;
7301 int ret;
7302
7303 mutex_lock(&trace_types_lock);
7304
7305 ret = -EEXIST;
7306 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7307 if (tr->name && strcmp(tr->name, name) == 0)
7308 goto out_unlock;
7309 }
7310
7311 ret = -ENOMEM;
7312 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7313 if (!tr)
7314 goto out_unlock;
7315
7316 tr->name = kstrdup(name, GFP_KERNEL);
7317 if (!tr->name)
7318 goto out_free_tr;
7319
7320 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7321 goto out_free_tr;
7322
7323 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7324
7325 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7326
7327 raw_spin_lock_init(&tr->start_lock);
7328
7329 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7330
7331 tr->current_trace = &nop_trace;
7332
7333 INIT_LIST_HEAD(&tr->systems);
7334 INIT_LIST_HEAD(&tr->events);
7335
7336 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7337 goto out_free_tr;
7338
7339 tr->dir = tracefs_create_dir(name, trace_instance_dir);
7340 if (!tr->dir)
7341 goto out_free_tr;
7342
7343 ret = event_trace_add_tracer(tr->dir, tr);
7344 if (ret) {
7345 tracefs_remove_recursive(tr->dir);
7346 goto out_free_tr;
7347 }
7348
7349 init_tracer_tracefs(tr, tr->dir);
7350 init_trace_flags_index(tr);
7351 __update_tracer_options(tr);
7352
7353 list_add(&tr->list, &ftrace_trace_arrays);
7354
7355 mutex_unlock(&trace_types_lock);
7356
7357 return 0;
7358
7359 out_free_tr:
7360 free_trace_buffers(tr);
7361 free_cpumask_var(tr->tracing_cpumask);
7362 kfree(tr->name);
7363 kfree(tr);
7364
7365 out_unlock:
7366 mutex_unlock(&trace_types_lock);
7367
7368 return ret;
7369
7370 }
7371
7372 static int instance_rmdir(const char *name)
7373 {
7374 struct trace_array *tr;
7375 int found = 0;
7376 int ret;
7377 int i;
7378
7379 mutex_lock(&trace_types_lock);
7380
7381 ret = -ENODEV;
7382 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7383 if (tr->name && strcmp(tr->name, name) == 0) {
7384 found = 1;
7385 break;
7386 }
7387 }
7388 if (!found)
7389 goto out_unlock;
7390
7391 ret = -EBUSY;
7392 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7393 goto out_unlock;
7394
7395 list_del(&tr->list);
7396
7397 /* Disable all the flags that were enabled coming in */
7398 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7399 if ((1 << i) & ZEROED_TRACE_FLAGS)
7400 set_tracer_flag(tr, 1 << i, 0);
7401 }
7402
7403 tracing_set_nop(tr);
7404 event_trace_del_tracer(tr);
7405 ftrace_destroy_function_files(tr);
7406 tracefs_remove_recursive(tr->dir);
7407 free_trace_buffers(tr);
7408
7409 for (i = 0; i < tr->nr_topts; i++) {
7410 kfree(tr->topts[i].topts);
7411 }
7412 kfree(tr->topts);
7413
7414 kfree(tr->name);
7415 kfree(tr);
7416
7417 ret = 0;
7418
7419 out_unlock:
7420 mutex_unlock(&trace_types_lock);
7421
7422 return ret;
7423 }
7424
7425 static __init void create_trace_instances(struct dentry *d_tracer)
7426 {
7427 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7428 instance_mkdir,
7429 instance_rmdir);
7430 if (WARN_ON(!trace_instance_dir))
7431 return;
7432 }
7433
7434 static void
7435 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7436 {
7437 int cpu;
7438
7439 trace_create_file("available_tracers", 0444, d_tracer,
7440 tr, &show_traces_fops);
7441
7442 trace_create_file("current_tracer", 0644, d_tracer,
7443 tr, &set_tracer_fops);
7444
7445 trace_create_file("tracing_cpumask", 0644, d_tracer,
7446 tr, &tracing_cpumask_fops);
7447
7448 trace_create_file("trace_options", 0644, d_tracer,
7449 tr, &tracing_iter_fops);
7450
7451 trace_create_file("trace", 0644, d_tracer,
7452 tr, &tracing_fops);
7453
7454 trace_create_file("trace_pipe", 0444, d_tracer,
7455 tr, &tracing_pipe_fops);
7456
7457 trace_create_file("buffer_size_kb", 0644, d_tracer,
7458 tr, &tracing_entries_fops);
7459
7460 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7461 tr, &tracing_total_entries_fops);
7462
7463 trace_create_file("free_buffer", 0200, d_tracer,
7464 tr, &tracing_free_buffer_fops);
7465
7466 trace_create_file("trace_marker", 0220, d_tracer,
7467 tr, &tracing_mark_fops);
7468
7469 trace_create_file("trace_marker_raw", 0220, d_tracer,
7470 tr, &tracing_mark_raw_fops);
7471
7472 trace_create_file("trace_clock", 0644, d_tracer, tr,
7473 &trace_clock_fops);
7474
7475 trace_create_file("tracing_on", 0644, d_tracer,
7476 tr, &rb_simple_fops);
7477
7478 create_trace_options_dir(tr);
7479
7480 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7481 trace_create_file("tracing_max_latency", 0644, d_tracer,
7482 &tr->max_latency, &tracing_max_lat_fops);
7483 #endif
7484
7485 if (ftrace_create_function_files(tr, d_tracer))
7486 WARN(1, "Could not allocate function filter files");
7487
7488 #ifdef CONFIG_TRACER_SNAPSHOT
7489 trace_create_file("snapshot", 0644, d_tracer,
7490 tr, &snapshot_fops);
7491 #endif
7492
7493 for_each_tracing_cpu(cpu)
7494 tracing_init_tracefs_percpu(tr, cpu);
7495
7496 ftrace_init_tracefs(tr, d_tracer);
7497 }
7498
7499 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7500 {
7501 struct vfsmount *mnt;
7502 struct file_system_type *type;
7503
7504 /*
7505 * To maintain backward compatibility for tools that mount
7506 * debugfs to get to the tracing facility, tracefs is automatically
7507 * mounted to the debugfs/tracing directory.
7508 */
7509 type = get_fs_type("tracefs");
7510 if (!type)
7511 return NULL;
7512 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7513 put_filesystem(type);
7514 if (IS_ERR(mnt))
7515 return NULL;
7516 mntget(mnt);
7517
7518 return mnt;
7519 }
7520
7521 /**
7522 * tracing_init_dentry - initialize top level trace array
7523 *
7524 * This is called when creating files or directories in the tracing
7525 * directory. It is called via fs_initcall() by any of the boot up code
7526 * and expects to return the dentry of the top level tracing directory.
7527 */
7528 struct dentry *tracing_init_dentry(void)
7529 {
7530 struct trace_array *tr = &global_trace;
7531
7532 /* The top level trace array uses NULL as parent */
7533 if (tr->dir)
7534 return NULL;
7535
7536 if (WARN_ON(!tracefs_initialized()) ||
7537 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7538 WARN_ON(!debugfs_initialized())))
7539 return ERR_PTR(-ENODEV);
7540
7541 /*
7542 * As there may still be users that expect the tracing
7543 * files to exist in debugfs/tracing, we must automount
7544 * the tracefs file system there, so older tools still
7545 * work with the newer kerenl.
7546 */
7547 tr->dir = debugfs_create_automount("tracing", NULL,
7548 trace_automount, NULL);
7549 if (!tr->dir) {
7550 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7551 return ERR_PTR(-ENOMEM);
7552 }
7553
7554 return NULL;
7555 }
7556
7557 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7558 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7559
7560 static void __init trace_enum_init(void)
7561 {
7562 int len;
7563
7564 len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7565 trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7566 }
7567
7568 #ifdef CONFIG_MODULES
7569 static void trace_module_add_enums(struct module *mod)
7570 {
7571 if (!mod->num_trace_enums)
7572 return;
7573
7574 /*
7575 * Modules with bad taint do not have events created, do
7576 * not bother with enums either.
7577 */
7578 if (trace_module_has_bad_taint(mod))
7579 return;
7580
7581 trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7582 }
7583
7584 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7585 static void trace_module_remove_enums(struct module *mod)
7586 {
7587 union trace_enum_map_item *map;
7588 union trace_enum_map_item **last = &trace_enum_maps;
7589
7590 if (!mod->num_trace_enums)
7591 return;
7592
7593 mutex_lock(&trace_enum_mutex);
7594
7595 map = trace_enum_maps;
7596
7597 while (map) {
7598 if (map->head.mod == mod)
7599 break;
7600 map = trace_enum_jmp_to_tail(map);
7601 last = &map->tail.next;
7602 map = map->tail.next;
7603 }
7604 if (!map)
7605 goto out;
7606
7607 *last = trace_enum_jmp_to_tail(map)->tail.next;
7608 kfree(map);
7609 out:
7610 mutex_unlock(&trace_enum_mutex);
7611 }
7612 #else
7613 static inline void trace_module_remove_enums(struct module *mod) { }
7614 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7615
7616 static int trace_module_notify(struct notifier_block *self,
7617 unsigned long val, void *data)
7618 {
7619 struct module *mod = data;
7620
7621 switch (val) {
7622 case MODULE_STATE_COMING:
7623 trace_module_add_enums(mod);
7624 break;
7625 case MODULE_STATE_GOING:
7626 trace_module_remove_enums(mod);
7627 break;
7628 }
7629
7630 return 0;
7631 }
7632
7633 static struct notifier_block trace_module_nb = {
7634 .notifier_call = trace_module_notify,
7635 .priority = 0,
7636 };
7637 #endif /* CONFIG_MODULES */
7638
7639 static __init int tracer_init_tracefs(void)
7640 {
7641 struct dentry *d_tracer;
7642
7643 trace_access_lock_init();
7644
7645 d_tracer = tracing_init_dentry();
7646 if (IS_ERR(d_tracer))
7647 return 0;
7648
7649 init_tracer_tracefs(&global_trace, d_tracer);
7650 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7651
7652 trace_create_file("tracing_thresh", 0644, d_tracer,
7653 &global_trace, &tracing_thresh_fops);
7654
7655 trace_create_file("README", 0444, d_tracer,
7656 NULL, &tracing_readme_fops);
7657
7658 trace_create_file("saved_cmdlines", 0444, d_tracer,
7659 NULL, &tracing_saved_cmdlines_fops);
7660
7661 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7662 NULL, &tracing_saved_cmdlines_size_fops);
7663
7664 trace_enum_init();
7665
7666 trace_create_enum_file(d_tracer);
7667
7668 #ifdef CONFIG_MODULES
7669 register_module_notifier(&trace_module_nb);
7670 #endif
7671
7672 #ifdef CONFIG_DYNAMIC_FTRACE
7673 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7674 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7675 #endif
7676
7677 create_trace_instances(d_tracer);
7678
7679 update_tracer_options(&global_trace);
7680
7681 return 0;
7682 }
7683
7684 static int trace_panic_handler(struct notifier_block *this,
7685 unsigned long event, void *unused)
7686 {
7687 if (ftrace_dump_on_oops)
7688 ftrace_dump(ftrace_dump_on_oops);
7689 return NOTIFY_OK;
7690 }
7691
7692 static struct notifier_block trace_panic_notifier = {
7693 .notifier_call = trace_panic_handler,
7694 .next = NULL,
7695 .priority = 150 /* priority: INT_MAX >= x >= 0 */
7696 };
7697
7698 static int trace_die_handler(struct notifier_block *self,
7699 unsigned long val,
7700 void *data)
7701 {
7702 switch (val) {
7703 case DIE_OOPS:
7704 if (ftrace_dump_on_oops)
7705 ftrace_dump(ftrace_dump_on_oops);
7706 break;
7707 default:
7708 break;
7709 }
7710 return NOTIFY_OK;
7711 }
7712
7713 static struct notifier_block trace_die_notifier = {
7714 .notifier_call = trace_die_handler,
7715 .priority = 200
7716 };
7717
7718 /*
7719 * printk is set to max of 1024, we really don't need it that big.
7720 * Nothing should be printing 1000 characters anyway.
7721 */
7722 #define TRACE_MAX_PRINT 1000
7723
7724 /*
7725 * Define here KERN_TRACE so that we have one place to modify
7726 * it if we decide to change what log level the ftrace dump
7727 * should be at.
7728 */
7729 #define KERN_TRACE KERN_EMERG
7730
7731 void
7732 trace_printk_seq(struct trace_seq *s)
7733 {
7734 /* Probably should print a warning here. */
7735 if (s->seq.len >= TRACE_MAX_PRINT)
7736 s->seq.len = TRACE_MAX_PRINT;
7737
7738 /*
7739 * More paranoid code. Although the buffer size is set to
7740 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7741 * an extra layer of protection.
7742 */
7743 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7744 s->seq.len = s->seq.size - 1;
7745
7746 /* should be zero ended, but we are paranoid. */
7747 s->buffer[s->seq.len] = 0;
7748
7749 printk(KERN_TRACE "%s", s->buffer);
7750
7751 trace_seq_init(s);
7752 }
7753
7754 void trace_init_global_iter(struct trace_iterator *iter)
7755 {
7756 iter->tr = &global_trace;
7757 iter->trace = iter->tr->current_trace;
7758 iter->cpu_file = RING_BUFFER_ALL_CPUS;
7759 iter->trace_buffer = &global_trace.trace_buffer;
7760
7761 if (iter->trace && iter->trace->open)
7762 iter->trace->open(iter);
7763
7764 /* Annotate start of buffers if we had overruns */
7765 if (ring_buffer_overruns(iter->trace_buffer->buffer))
7766 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7767
7768 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7769 if (trace_clocks[iter->tr->clock_id].in_ns)
7770 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7771 }
7772
7773 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7774 {
7775 /* use static because iter can be a bit big for the stack */
7776 static struct trace_iterator iter;
7777 static atomic_t dump_running;
7778 struct trace_array *tr = &global_trace;
7779 unsigned int old_userobj;
7780 unsigned long flags;
7781 int cnt = 0, cpu;
7782
7783 /* Only allow one dump user at a time. */
7784 if (atomic_inc_return(&dump_running) != 1) {
7785 atomic_dec(&dump_running);
7786 return;
7787 }
7788
7789 /*
7790 * Always turn off tracing when we dump.
7791 * We don't need to show trace output of what happens
7792 * between multiple crashes.
7793 *
7794 * If the user does a sysrq-z, then they can re-enable
7795 * tracing with echo 1 > tracing_on.
7796 */
7797 tracing_off();
7798
7799 local_irq_save(flags);
7800
7801 /* Simulate the iterator */
7802 trace_init_global_iter(&iter);
7803
7804 for_each_tracing_cpu(cpu) {
7805 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7806 }
7807
7808 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7809
7810 /* don't look at user memory in panic mode */
7811 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7812
7813 switch (oops_dump_mode) {
7814 case DUMP_ALL:
7815 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7816 break;
7817 case DUMP_ORIG:
7818 iter.cpu_file = raw_smp_processor_id();
7819 break;
7820 case DUMP_NONE:
7821 goto out_enable;
7822 default:
7823 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7824 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7825 }
7826
7827 printk(KERN_TRACE "Dumping ftrace buffer:\n");
7828
7829 /* Did function tracer already get disabled? */
7830 if (ftrace_is_dead()) {
7831 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7832 printk("# MAY BE MISSING FUNCTION EVENTS\n");
7833 }
7834
7835 /*
7836 * We need to stop all tracing on all CPUS to read the
7837 * the next buffer. This is a bit expensive, but is
7838 * not done often. We fill all what we can read,
7839 * and then release the locks again.
7840 */
7841
7842 while (!trace_empty(&iter)) {
7843
7844 if (!cnt)
7845 printk(KERN_TRACE "---------------------------------\n");
7846
7847 cnt++;
7848
7849 /* reset all but tr, trace, and overruns */
7850 memset(&iter.seq, 0,
7851 sizeof(struct trace_iterator) -
7852 offsetof(struct trace_iterator, seq));
7853 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7854 iter.pos = -1;
7855
7856 if (trace_find_next_entry_inc(&iter) != NULL) {
7857 int ret;
7858
7859 ret = print_trace_line(&iter);
7860 if (ret != TRACE_TYPE_NO_CONSUME)
7861 trace_consume(&iter);
7862 }
7863 touch_nmi_watchdog();
7864
7865 trace_printk_seq(&iter.seq);
7866 }
7867
7868 if (!cnt)
7869 printk(KERN_TRACE " (ftrace buffer empty)\n");
7870 else
7871 printk(KERN_TRACE "---------------------------------\n");
7872
7873 out_enable:
7874 tr->trace_flags |= old_userobj;
7875
7876 for_each_tracing_cpu(cpu) {
7877 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7878 }
7879 atomic_dec(&dump_running);
7880 local_irq_restore(flags);
7881 }
7882 EXPORT_SYMBOL_GPL(ftrace_dump);
7883
7884 __init static int tracer_alloc_buffers(void)
7885 {
7886 int ring_buf_size;
7887 int ret = -ENOMEM;
7888
7889 /*
7890 * Make sure we don't accidently add more trace options
7891 * than we have bits for.
7892 */
7893 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7894
7895 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7896 goto out;
7897
7898 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7899 goto out_free_buffer_mask;
7900
7901 /* Only allocate trace_printk buffers if a trace_printk exists */
7902 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7903 /* Must be called before global_trace.buffer is allocated */
7904 trace_printk_init_buffers();
7905
7906 /* To save memory, keep the ring buffer size to its minimum */
7907 if (ring_buffer_expanded)
7908 ring_buf_size = trace_buf_size;
7909 else
7910 ring_buf_size = 1;
7911
7912 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7913 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7914
7915 raw_spin_lock_init(&global_trace.start_lock);
7916
7917 /*
7918 * The prepare callbacks allocates some memory for the ring buffer. We
7919 * don't free the buffer if the if the CPU goes down. If we were to free
7920 * the buffer, then the user would lose any trace that was in the
7921 * buffer. The memory will be removed once the "instance" is removed.
7922 */
7923 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
7924 "trace/RB:preapre", trace_rb_cpu_prepare,
7925 NULL);
7926 if (ret < 0)
7927 goto out_free_cpumask;
7928 /* Used for event triggers */
7929 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7930 if (!temp_buffer)
7931 goto out_rm_hp_state;
7932
7933 if (trace_create_savedcmd() < 0)
7934 goto out_free_temp_buffer;
7935
7936 /* TODO: make the number of buffers hot pluggable with CPUS */
7937 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7938 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7939 WARN_ON(1);
7940 goto out_free_savedcmd;
7941 }
7942
7943 if (global_trace.buffer_disabled)
7944 tracing_off();
7945
7946 if (trace_boot_clock) {
7947 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7948 if (ret < 0)
7949 pr_warn("Trace clock %s not defined, going back to default\n",
7950 trace_boot_clock);
7951 }
7952
7953 /*
7954 * register_tracer() might reference current_trace, so it
7955 * needs to be set before we register anything. This is
7956 * just a bootstrap of current_trace anyway.
7957 */
7958 global_trace.current_trace = &nop_trace;
7959
7960 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7961
7962 ftrace_init_global_array_ops(&global_trace);
7963
7964 init_trace_flags_index(&global_trace);
7965
7966 register_tracer(&nop_trace);
7967
7968 /* All seems OK, enable tracing */
7969 tracing_disabled = 0;
7970
7971 atomic_notifier_chain_register(&panic_notifier_list,
7972 &trace_panic_notifier);
7973
7974 register_die_notifier(&trace_die_notifier);
7975
7976 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7977
7978 INIT_LIST_HEAD(&global_trace.systems);
7979 INIT_LIST_HEAD(&global_trace.events);
7980 list_add(&global_trace.list, &ftrace_trace_arrays);
7981
7982 apply_trace_boot_options();
7983
7984 register_snapshot_cmd();
7985
7986 return 0;
7987
7988 out_free_savedcmd:
7989 free_saved_cmdlines_buffer(savedcmd);
7990 out_free_temp_buffer:
7991 ring_buffer_free(temp_buffer);
7992 out_rm_hp_state:
7993 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
7994 out_free_cpumask:
7995 free_cpumask_var(global_trace.tracing_cpumask);
7996 out_free_buffer_mask:
7997 free_cpumask_var(tracing_buffer_mask);
7998 out:
7999 return ret;
8000 }
8001
8002 void __init trace_init(void)
8003 {
8004 if (tracepoint_printk) {
8005 tracepoint_print_iter =
8006 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8007 if (WARN_ON(!tracepoint_print_iter))
8008 tracepoint_printk = 0;
8009 else
8010 static_key_enable(&tracepoint_printk_key.key);
8011 }
8012 tracer_alloc_buffers();
8013 trace_event_init();
8014 }
8015
8016 __init static int clear_boot_tracer(void)
8017 {
8018 /*
8019 * The default tracer at boot buffer is an init section.
8020 * This function is called in lateinit. If we did not
8021 * find the boot tracer, then clear it out, to prevent
8022 * later registration from accessing the buffer that is
8023 * about to be freed.
8024 */
8025 if (!default_bootup_tracer)
8026 return 0;
8027
8028 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8029 default_bootup_tracer);
8030 default_bootup_tracer = NULL;
8031
8032 return 0;
8033 }
8034
8035 fs_initcall(tracer_init_tracefs);
8036 late_initcall(clear_boot_tracer);