]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - kernel/trace/trace.c
ipv4: convert dst_metrics.refcnt from atomic_t to refcount_t
[mirror_ubuntu-artful-kernel.git] / kernel / trace / trace.c
1 /*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
13 */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50 * On boot up, the ring buffer is set to the minimum size, so that
51 * we do not waste memory on systems that are not using tracing.
52 */
53 bool ring_buffer_expanded;
54
55 /*
56 * We need to change this state when a selftest is running.
57 * A selftest will lurk into the ring-buffer to count the
58 * entries inserted during the selftest although some concurrent
59 * insertions into the ring-buffer such as trace_printk could occurred
60 * at the same time, giving false positive or negative results.
61 */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65 * If a tracer is running, we do not want to run SELFTEST.
66 */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76 { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82 return 0;
83 }
84
85 /*
86 * To prevent the comm cache from being overwritten when no
87 * tracing is active, only save the comm when a trace event
88 * occurred.
89 */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91
92 /*
93 * Kill all tracing for good (never come back).
94 * It is initialized to 1 but will turn to zero if the initialization
95 * of the tracer is successful. But that is the only place that sets
96 * this back to zero.
97 */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly tracing_buffer_mask;
101
102 /*
103 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104 *
105 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106 * is set, then ftrace_dump is called. This will output the contents
107 * of the ftrace buffers to the console. This is very useful for
108 * capturing traces that lead to crashes and outputing it to a
109 * serial console.
110 *
111 * It is default off, but you can enable it with either specifying
112 * "ftrace_dump_on_oops" in the kernel command line, or setting
113 * /proc/sys/kernel/ftrace_dump_on_oops
114 * Set 1 if you want to dump buffers of all CPUs
115 * Set 2 if you want to dump the buffer of the CPU that triggered oops
116 */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126 struct module *mod;
127 unsigned long length;
128 };
129
130 union trace_eval_map_item;
131
132 struct trace_eval_map_tail {
133 /*
134 * "end" is first and points to NULL as it must be different
135 * than "mod" or "eval_string"
136 */
137 union trace_eval_map_item *next;
138 const char *end; /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_eval_mutex);
142
143 /*
144 * The trace_eval_maps are saved in an array with two extra elements,
145 * one at the beginning, and one at the end. The beginning item contains
146 * the count of the saved maps (head.length), and the module they
147 * belong to if not built in (head.mod). The ending item contains a
148 * pointer to the next array of saved eval_map items.
149 */
150 union trace_eval_map_item {
151 struct trace_eval_map map;
152 struct trace_eval_map_head head;
153 struct trace_eval_map_tail tail;
154 };
155
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE 100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 default_bootup_tracer = bootup_tracer_buf;
171 /* We are using ftrace early, expand it */
172 ring_buffer_expanded = true;
173 return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179 if (*str++ != '=' || !*str) {
180 ftrace_dump_on_oops = DUMP_ALL;
181 return 1;
182 }
183
184 if (!strcmp("orig_cpu", str)) {
185 ftrace_dump_on_oops = DUMP_ORIG;
186 return 1;
187 }
188
189 return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 __disable_trace_on_warning = 1;
197 return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203 allocate_snapshot = true;
204 /* We also need the main ring buffer expanded */
205 ring_buffer_expanded = true;
206 return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216 return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 trace_boot_clock = trace_boot_clock_buf;
227 return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 tracepoint_printk = 1;
235 return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241 nsec += 500;
242 do_div(nsec, 1000);
243 return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS \
248 (FUNCTION_DEFAULT_FLAGS | \
249 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
250 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
251 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
252 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
256 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263 * The global_trace is the descriptor that holds the top-level tracing
264 * buffers for the live tracing.
265 */
266 static struct trace_array global_trace = {
267 .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274 struct trace_array *tr;
275 int ret = -ENODEV;
276
277 mutex_lock(&trace_types_lock);
278 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279 if (tr == this_tr) {
280 tr->ref++;
281 ret = 0;
282 break;
283 }
284 }
285 mutex_unlock(&trace_types_lock);
286
287 return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292 WARN_ON(!this_tr->ref);
293 this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298 mutex_lock(&trace_types_lock);
299 __trace_array_put(this_tr);
300 mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 struct ring_buffer *buffer,
305 struct ring_buffer_event *event)
306 {
307 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 !filter_match_preds(call->filter, rec)) {
309 __trace_event_discard_commit(buffer, event);
310 return 1;
311 }
312
313 return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318 vfree(pid_list->pids);
319 kfree(pid_list);
320 }
321
322 /**
323 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324 * @filtered_pids: The list of pids to check
325 * @search_pid: The PID to find in @filtered_pids
326 *
327 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328 */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332 /*
333 * If pid_max changed after filtered_pids was created, we
334 * by default ignore all pids greater than the previous pid_max.
335 */
336 if (search_pid >= filtered_pids->pid_max)
337 return false;
338
339 return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343 * trace_ignore_this_task - should a task be ignored for tracing
344 * @filtered_pids: The list of pids to check
345 * @task: The task that should be ignored if not filtered
346 *
347 * Checks if @task should be traced or not from @filtered_pids.
348 * Returns true if @task should *NOT* be traced.
349 * Returns false if @task should be traced.
350 */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354 /*
355 * Return false, because if filtered_pids does not exist,
356 * all pids are good to trace.
357 */
358 if (!filtered_pids)
359 return false;
360
361 return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366 * @pid_list: The list to modify
367 * @self: The current task for fork or NULL for exit
368 * @task: The task to add or remove
369 *
370 * If adding a task, if @self is defined, the task is only added if @self
371 * is also included in @pid_list. This happens on fork and tasks should
372 * only be added when the parent is listed. If @self is NULL, then the
373 * @task pid will be removed from the list, which would happen on exit
374 * of a task.
375 */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 struct task_struct *self,
378 struct task_struct *task)
379 {
380 if (!pid_list)
381 return;
382
383 /* For forks, we only add if the forking task is listed */
384 if (self) {
385 if (!trace_find_filtered_pid(pid_list, self->pid))
386 return;
387 }
388
389 /* Sorry, but we don't support pid_max changing after setting */
390 if (task->pid >= pid_list->pid_max)
391 return;
392
393 /* "self" is set for forks, and NULL for exits */
394 if (self)
395 set_bit(task->pid, pid_list->pids);
396 else
397 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402 * @pid_list: The pid list to show
403 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404 * @pos: The position of the file
405 *
406 * This is used by the seq_file "next" operation to iterate the pids
407 * listed in a trace_pid_list structure.
408 *
409 * Returns the pid+1 as we want to display pid of zero, but NULL would
410 * stop the iteration.
411 */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414 unsigned long pid = (unsigned long)v;
415
416 (*pos)++;
417
418 /* pid already is +1 of the actual prevous bit */
419 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421 /* Return pid + 1 to allow zero to be represented */
422 if (pid < pid_list->pid_max)
423 return (void *)(pid + 1);
424
425 return NULL;
426 }
427
428 /**
429 * trace_pid_start - Used for seq_file to start reading pid lists
430 * @pid_list: The pid list to show
431 * @pos: The position of the file
432 *
433 * This is used by seq_file "start" operation to start the iteration
434 * of listing pids.
435 *
436 * Returns the pid+1 as we want to display pid of zero, but NULL would
437 * stop the iteration.
438 */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441 unsigned long pid;
442 loff_t l = 0;
443
444 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 if (pid >= pid_list->pid_max)
446 return NULL;
447
448 /* Return pid + 1 so that zero can be the exit value */
449 for (pid++; pid && l < *pos;
450 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451 ;
452 return (void *)pid;
453 }
454
455 /**
456 * trace_pid_show - show the current pid in seq_file processing
457 * @m: The seq_file structure to write into
458 * @v: A void pointer of the pid (+1) value to display
459 *
460 * Can be directly used by seq_file operations to display the current
461 * pid value.
462 */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465 unsigned long pid = (unsigned long)v - 1;
466
467 seq_printf(m, "%lu\n", pid);
468 return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE 127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 struct trace_pid_list **new_pid_list,
476 const char __user *ubuf, size_t cnt)
477 {
478 struct trace_pid_list *pid_list;
479 struct trace_parser parser;
480 unsigned long val;
481 int nr_pids = 0;
482 ssize_t read = 0;
483 ssize_t ret = 0;
484 loff_t pos;
485 pid_t pid;
486
487 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488 return -ENOMEM;
489
490 /*
491 * Always recreate a new array. The write is an all or nothing
492 * operation. Always create a new array when adding new pids by
493 * the user. If the operation fails, then the current list is
494 * not modified.
495 */
496 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497 if (!pid_list)
498 return -ENOMEM;
499
500 pid_list->pid_max = READ_ONCE(pid_max);
501
502 /* Only truncating will shrink pid_max */
503 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504 pid_list->pid_max = filtered_pids->pid_max;
505
506 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507 if (!pid_list->pids) {
508 kfree(pid_list);
509 return -ENOMEM;
510 }
511
512 if (filtered_pids) {
513 /* copy the current bits to the new max */
514 for_each_set_bit(pid, filtered_pids->pids,
515 filtered_pids->pid_max) {
516 set_bit(pid, pid_list->pids);
517 nr_pids++;
518 }
519 }
520
521 while (cnt > 0) {
522
523 pos = 0;
524
525 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526 if (ret < 0 || !trace_parser_loaded(&parser))
527 break;
528
529 read += ret;
530 ubuf += ret;
531 cnt -= ret;
532
533 parser.buffer[parser.idx] = 0;
534
535 ret = -EINVAL;
536 if (kstrtoul(parser.buffer, 0, &val))
537 break;
538 if (val >= pid_list->pid_max)
539 break;
540
541 pid = (pid_t)val;
542
543 set_bit(pid, pid_list->pids);
544 nr_pids++;
545
546 trace_parser_clear(&parser);
547 ret = 0;
548 }
549 trace_parser_put(&parser);
550
551 if (ret < 0) {
552 trace_free_pid_list(pid_list);
553 return ret;
554 }
555
556 if (!nr_pids) {
557 /* Cleared the list of pids */
558 trace_free_pid_list(pid_list);
559 read = ret;
560 pid_list = NULL;
561 }
562
563 *new_pid_list = pid_list;
564
565 return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 u64 ts;
571
572 /* Early boot up does not have a buffer yet */
573 if (!buf->buffer)
574 return trace_clock_local();
575
576 ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579 return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588 * tracing_is_enabled - Show if global_trace has been disabled
589 *
590 * Shows if the global trace has been enabled or not. It uses the
591 * mirror flag "buffer_disabled" to be used in fast paths such as for
592 * the irqsoff tracer. But it may be inaccurate due to races. If you
593 * need to know the accurate state, use tracing_is_on() which is a little
594 * slower, but accurate.
595 */
596 int tracing_is_enabled(void)
597 {
598 /*
599 * For quick access (irqsoff uses this in fast path), just
600 * return the mirror variable of the state of the ring buffer.
601 * It's a little racy, but we don't really care.
602 */
603 smp_rmb();
604 return !global_trace.buffer_disabled;
605 }
606
607 /*
608 * trace_buf_size is the size in bytes that is allocated
609 * for a buffer. Note, the number of bytes is always rounded
610 * to page size.
611 *
612 * This number is purposely set to a low number of 16384.
613 * If the dump on oops happens, it will be much appreciated
614 * to not have to wait for all that output. Anyway this can be
615 * boot time and run time configurable.
616 */
617 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer *trace_types __read_mostly;
623
624 /*
625 * trace_types_lock is used to protect the trace_types list.
626 */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630 * serialize the access of the ring buffer
631 *
632 * ring buffer serializes readers, but it is low level protection.
633 * The validity of the events (which returns by ring_buffer_peek() ..etc)
634 * are not protected by ring buffer.
635 *
636 * The content of events may become garbage if we allow other process consumes
637 * these events concurrently:
638 * A) the page of the consumed events may become a normal page
639 * (not reader page) in ring buffer, and this page will be rewrited
640 * by events producer.
641 * B) The page of the consumed events may become a page for splice_read,
642 * and this page will be returned to system.
643 *
644 * These primitives allow multi process access to different cpu ring buffer
645 * concurrently.
646 *
647 * These primitives don't distinguish read-only and read-consume access.
648 * Multi read-only access are also serialized.
649 */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657 if (cpu == RING_BUFFER_ALL_CPUS) {
658 /* gain it for accessing the whole ring buffer. */
659 down_write(&all_cpu_access_lock);
660 } else {
661 /* gain it for accessing a cpu ring buffer. */
662
663 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 down_read(&all_cpu_access_lock);
665
666 /* Secondly block other access to this @cpu ring buffer. */
667 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673 if (cpu == RING_BUFFER_ALL_CPUS) {
674 up_write(&all_cpu_access_lock);
675 } else {
676 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 up_read(&all_cpu_access_lock);
678 }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683 int cpu;
684
685 for_each_possible_cpu(cpu)
686 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695 (void)cpu;
696 mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701 (void)cpu;
702 mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 unsigned long flags,
714 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 struct ring_buffer *buffer,
717 unsigned long flags,
718 int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 unsigned long flags,
723 int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 struct ring_buffer *buffer,
728 unsigned long flags,
729 int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 int type, unsigned long flags, int pc)
738 {
739 struct trace_entry *ent = ring_buffer_event_data(event);
740
741 tracing_generic_entry_update(ent, flags, pc);
742 ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 int type,
748 unsigned long len,
749 unsigned long flags, int pc)
750 {
751 struct ring_buffer_event *event;
752
753 event = ring_buffer_lock_reserve(buffer, len);
754 if (event != NULL)
755 trace_event_setup(event, type, flags, pc);
756
757 return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 if (tr->trace_buffer.buffer)
763 ring_buffer_record_on(tr->trace_buffer.buffer);
764 /*
765 * This flag is looked at when buffers haven't been allocated
766 * yet, or by some tracers (like irqsoff), that just want to
767 * know if the ring buffer has been disabled, but it can handle
768 * races of where it gets disabled but we still do a record.
769 * As the check is in the fast path of the tracers, it is more
770 * important to be fast than accurate.
771 */
772 tr->buffer_disabled = 0;
773 /* Make the flag seen by readers */
774 smp_wmb();
775 }
776
777 /**
778 * tracing_on - enable tracing buffers
779 *
780 * This function enables tracing buffers that may have been
781 * disabled with tracing_off.
782 */
783 void tracing_on(void)
784 {
785 tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 __this_cpu_write(trace_taskinfo_save, true);
794
795 /* If this is the temp buffer, we need to commit fully */
796 if (this_cpu_read(trace_buffered_event) == event) {
797 /* Length is in event->array[0] */
798 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 /* Release the temp buffer */
800 this_cpu_dec(trace_buffered_event_cnt);
801 } else
802 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806 * __trace_puts - write a constant string into the trace buffer.
807 * @ip: The address of the caller
808 * @str: The constant string to write
809 * @size: The size of the string.
810 */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 struct ring_buffer_event *event;
814 struct ring_buffer *buffer;
815 struct print_entry *entry;
816 unsigned long irq_flags;
817 int alloc;
818 int pc;
819
820 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 return 0;
822
823 pc = preempt_count();
824
825 if (unlikely(tracing_selftest_running || tracing_disabled))
826 return 0;
827
828 alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830 local_save_flags(irq_flags);
831 buffer = global_trace.trace_buffer.buffer;
832 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 irq_flags, pc);
834 if (!event)
835 return 0;
836
837 entry = ring_buffer_event_data(event);
838 entry->ip = ip;
839
840 memcpy(&entry->buf, str, size);
841
842 /* Add a newline if necessary */
843 if (entry->buf[size - 1] != '\n') {
844 entry->buf[size] = '\n';
845 entry->buf[size + 1] = '\0';
846 } else
847 entry->buf[size] = '\0';
848
849 __buffer_unlock_commit(buffer, event);
850 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852 return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857 * __trace_bputs - write the pointer to a constant string into trace buffer
858 * @ip: The address of the caller
859 * @str: The constant string to write to the buffer to
860 */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 struct ring_buffer_event *event;
864 struct ring_buffer *buffer;
865 struct bputs_entry *entry;
866 unsigned long irq_flags;
867 int size = sizeof(struct bputs_entry);
868 int pc;
869
870 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 return 0;
872
873 pc = preempt_count();
874
875 if (unlikely(tracing_selftest_running || tracing_disabled))
876 return 0;
877
878 local_save_flags(irq_flags);
879 buffer = global_trace.trace_buffer.buffer;
880 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 irq_flags, pc);
882 if (!event)
883 return 0;
884
885 entry = ring_buffer_event_data(event);
886 entry->ip = ip;
887 entry->str = str;
888
889 __buffer_unlock_commit(buffer, event);
890 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892 return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899 struct tracer *tracer = tr->current_trace;
900 unsigned long flags;
901
902 if (in_nmi()) {
903 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 internal_trace_puts("*** snapshot is being ignored ***\n");
905 return;
906 }
907
908 if (!tr->allocated_snapshot) {
909 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 internal_trace_puts("*** stopping trace here! ***\n");
911 tracing_off();
912 return;
913 }
914
915 /* Note, snapshot can not be used when the tracer uses it */
916 if (tracer->use_max_tr) {
917 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 return;
920 }
921
922 local_irq_save(flags);
923 update_max_tr(tr, current, smp_processor_id());
924 local_irq_restore(flags);
925 }
926
927 /**
928 * trace_snapshot - take a snapshot of the current buffer.
929 *
930 * This causes a swap between the snapshot buffer and the current live
931 * tracing buffer. You can use this to take snapshots of the live
932 * trace when some condition is triggered, but continue to trace.
933 *
934 * Note, make sure to allocate the snapshot with either
935 * a tracing_snapshot_alloc(), or by doing it manually
936 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937 *
938 * If the snapshot buffer is not allocated, it will stop tracing.
939 * Basically making a permanent snapshot.
940 */
941 void tracing_snapshot(void)
942 {
943 struct trace_array *tr = &global_trace;
944
945 tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950 struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955 int ret;
956
957 if (!tr->allocated_snapshot) {
958
959 /* allocate spare buffer */
960 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962 if (ret < 0)
963 return ret;
964
965 tr->allocated_snapshot = true;
966 }
967
968 return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973 /*
974 * We don't free the ring buffer. instead, resize it because
975 * The max_tr ring buffer has some state (e.g. ring->clock) and
976 * we want preserve it.
977 */
978 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979 set_buffer_entries(&tr->max_buffer, 1);
980 tracing_reset_online_cpus(&tr->max_buffer);
981 tr->allocated_snapshot = false;
982 }
983
984 /**
985 * tracing_alloc_snapshot - allocate snapshot buffer.
986 *
987 * This only allocates the snapshot buffer if it isn't already
988 * allocated - it doesn't also take a snapshot.
989 *
990 * This is meant to be used in cases where the snapshot buffer needs
991 * to be set up for events that can't sleep but need to be able to
992 * trigger a snapshot.
993 */
994 int tracing_alloc_snapshot(void)
995 {
996 struct trace_array *tr = &global_trace;
997 int ret;
998
999 ret = alloc_snapshot(tr);
1000 WARN_ON(ret < 0);
1001
1002 return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008 *
1009 * This is similar to trace_snapshot(), but it will allocate the
1010 * snapshot buffer if it isn't already allocated. Use this only
1011 * where it is safe to sleep, as the allocation may sleep.
1012 *
1013 * This causes a swap between the snapshot buffer and the current live
1014 * tracing buffer. You can use this to take snapshots of the live
1015 * trace when some condition is triggered, but continue to trace.
1016 */
1017 void tracing_snapshot_alloc(void)
1018 {
1019 int ret;
1020
1021 ret = tracing_alloc_snapshot();
1022 if (ret < 0)
1023 return;
1024
1025 tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037 return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042 /* Give warning */
1043 tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050 if (tr->trace_buffer.buffer)
1051 ring_buffer_record_off(tr->trace_buffer.buffer);
1052 /*
1053 * This flag is looked at when buffers haven't been allocated
1054 * yet, or by some tracers (like irqsoff), that just want to
1055 * know if the ring buffer has been disabled, but it can handle
1056 * races of where it gets disabled but we still do a record.
1057 * As the check is in the fast path of the tracers, it is more
1058 * important to be fast than accurate.
1059 */
1060 tr->buffer_disabled = 1;
1061 /* Make the flag seen by readers */
1062 smp_wmb();
1063 }
1064
1065 /**
1066 * tracing_off - turn off tracing buffers
1067 *
1068 * This function stops the tracing buffers from recording data.
1069 * It does not disable any overhead the tracers themselves may
1070 * be causing. This function simply causes all recording to
1071 * the ring buffers to fail.
1072 */
1073 void tracing_off(void)
1074 {
1075 tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081 if (__disable_trace_on_warning)
1082 tracing_off();
1083 }
1084
1085 /**
1086 * tracer_tracing_is_on - show real state of ring buffer enabled
1087 * @tr : the trace array to know if ring buffer is enabled
1088 *
1089 * Shows real state of the ring buffer if it is enabled or not.
1090 */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093 if (tr->trace_buffer.buffer)
1094 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095 return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099 * tracing_is_on - show state of ring buffers enabled
1100 */
1101 int tracing_is_on(void)
1102 {
1103 return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109 unsigned long buf_size;
1110
1111 if (!str)
1112 return 0;
1113 buf_size = memparse(str, &str);
1114 /* nr_entries can not be zero */
1115 if (buf_size == 0)
1116 return 0;
1117 trace_buf_size = buf_size;
1118 return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124 unsigned long threshold;
1125 int ret;
1126
1127 if (!str)
1128 return 0;
1129 ret = kstrtoul(str, 0, &threshold);
1130 if (ret < 0)
1131 return 0;
1132 tracing_thresh = threshold * 1000;
1133 return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139 return nsecs / 1000;
1140 }
1141
1142 /*
1143 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146 * of strings in the order that the evals (enum) were defined.
1147 */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153 TRACE_FLAGS
1154 NULL
1155 };
1156
1157 static struct {
1158 u64 (*func)(void);
1159 const char *name;
1160 int in_ns; /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162 { trace_clock_local, "local", 1 },
1163 { trace_clock_global, "global", 1 },
1164 { trace_clock_counter, "counter", 0 },
1165 { trace_clock_jiffies, "uptime", 0 },
1166 { trace_clock, "perf", 1 },
1167 { ktime_get_mono_fast_ns, "mono", 1 },
1168 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1169 { ktime_get_boot_fast_ns, "boot", 1 },
1170 ARCH_TRACE_CLOCKS
1171 };
1172
1173 /*
1174 * trace_parser_get_init - gets the buffer for trace parser
1175 */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178 memset(parser, 0, sizeof(*parser));
1179
1180 parser->buffer = kmalloc(size, GFP_KERNEL);
1181 if (!parser->buffer)
1182 return 1;
1183
1184 parser->size = size;
1185 return 0;
1186 }
1187
1188 /*
1189 * trace_parser_put - frees the buffer for trace parser
1190 */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193 kfree(parser->buffer);
1194 parser->buffer = NULL;
1195 }
1196
1197 /*
1198 * trace_get_user - reads the user input string separated by space
1199 * (matched by isspace(ch))
1200 *
1201 * For each string found the 'struct trace_parser' is updated,
1202 * and the function returns.
1203 *
1204 * Returns number of bytes read.
1205 *
1206 * See kernel/trace/trace.h for 'struct trace_parser' details.
1207 */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209 size_t cnt, loff_t *ppos)
1210 {
1211 char ch;
1212 size_t read = 0;
1213 ssize_t ret;
1214
1215 if (!*ppos)
1216 trace_parser_clear(parser);
1217
1218 ret = get_user(ch, ubuf++);
1219 if (ret)
1220 goto out;
1221
1222 read++;
1223 cnt--;
1224
1225 /*
1226 * The parser is not finished with the last write,
1227 * continue reading the user input without skipping spaces.
1228 */
1229 if (!parser->cont) {
1230 /* skip white space */
1231 while (cnt && isspace(ch)) {
1232 ret = get_user(ch, ubuf++);
1233 if (ret)
1234 goto out;
1235 read++;
1236 cnt--;
1237 }
1238
1239 /* only spaces were written */
1240 if (isspace(ch)) {
1241 *ppos += read;
1242 ret = read;
1243 goto out;
1244 }
1245
1246 parser->idx = 0;
1247 }
1248
1249 /* read the non-space input */
1250 while (cnt && !isspace(ch)) {
1251 if (parser->idx < parser->size - 1)
1252 parser->buffer[parser->idx++] = ch;
1253 else {
1254 ret = -EINVAL;
1255 goto out;
1256 }
1257 ret = get_user(ch, ubuf++);
1258 if (ret)
1259 goto out;
1260 read++;
1261 cnt--;
1262 }
1263
1264 /* We either got finished input or we have to wait for another call. */
1265 if (isspace(ch)) {
1266 parser->buffer[parser->idx] = 0;
1267 parser->cont = false;
1268 } else if (parser->idx < parser->size - 1) {
1269 parser->cont = true;
1270 parser->buffer[parser->idx++] = ch;
1271 } else {
1272 ret = -EINVAL;
1273 goto out;
1274 }
1275
1276 *ppos += read;
1277 ret = read;
1278
1279 out:
1280 return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286 int len;
1287
1288 if (trace_seq_used(s) <= s->seq.readpos)
1289 return -EBUSY;
1290
1291 len = trace_seq_used(s) - s->seq.readpos;
1292 if (cnt > len)
1293 cnt = len;
1294 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296 s->seq.readpos += cnt;
1297 return cnt;
1298 }
1299
1300 unsigned long __read_mostly tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304 * Copy the new maximum trace into the separate maximum-trace
1305 * structure. (this way the maximum trace is permanently saved,
1306 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307 */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311 struct trace_buffer *trace_buf = &tr->trace_buffer;
1312 struct trace_buffer *max_buf = &tr->max_buffer;
1313 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316 max_buf->cpu = cpu;
1317 max_buf->time_start = data->preempt_timestamp;
1318
1319 max_data->saved_latency = tr->max_latency;
1320 max_data->critical_start = data->critical_start;
1321 max_data->critical_end = data->critical_end;
1322
1323 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324 max_data->pid = tsk->pid;
1325 /*
1326 * If tsk == current, then use current_uid(), as that does not use
1327 * RCU. The irq tracer can be called out of RCU scope.
1328 */
1329 if (tsk == current)
1330 max_data->uid = current_uid();
1331 else
1332 max_data->uid = task_uid(tsk);
1333
1334 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335 max_data->policy = tsk->policy;
1336 max_data->rt_priority = tsk->rt_priority;
1337
1338 /* record this tasks comm */
1339 tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344 * @tr: tracer
1345 * @tsk: the task with the latency
1346 * @cpu: The cpu that initiated the trace.
1347 *
1348 * Flip the buffers between the @tr and the max_tr and record information
1349 * about which task was the cause of this latency.
1350 */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354 struct ring_buffer *buf;
1355
1356 if (tr->stop_count)
1357 return;
1358
1359 WARN_ON_ONCE(!irqs_disabled());
1360
1361 if (!tr->allocated_snapshot) {
1362 /* Only the nop tracer should hit this when disabling */
1363 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364 return;
1365 }
1366
1367 arch_spin_lock(&tr->max_lock);
1368
1369 buf = tr->trace_buffer.buffer;
1370 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371 tr->max_buffer.buffer = buf;
1372
1373 __update_max_tr(tr, tsk, cpu);
1374 arch_spin_unlock(&tr->max_lock);
1375 }
1376
1377 /**
1378 * update_max_tr_single - only copy one trace over, and reset the rest
1379 * @tr - tracer
1380 * @tsk - task with the latency
1381 * @cpu - the cpu of the buffer to copy.
1382 *
1383 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384 */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388 int ret;
1389
1390 if (tr->stop_count)
1391 return;
1392
1393 WARN_ON_ONCE(!irqs_disabled());
1394 if (!tr->allocated_snapshot) {
1395 /* Only the nop tracer should hit this when disabling */
1396 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397 return;
1398 }
1399
1400 arch_spin_lock(&tr->max_lock);
1401
1402 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404 if (ret == -EBUSY) {
1405 /*
1406 * We failed to swap the buffer due to a commit taking
1407 * place on this CPU. We fail to record, but we reset
1408 * the max trace buffer (no one writes directly to it)
1409 * and flag that it failed.
1410 */
1411 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412 "Failed to swap buffers due to commit in progress\n");
1413 }
1414
1415 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417 __update_max_tr(tr, tsk, cpu);
1418 arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424 /* Iterators are static, they should be filled or empty */
1425 if (trace_buffer_iter(iter, iter->cpu_file))
1426 return 0;
1427
1428 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429 full);
1430 }
1431
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434
1435 struct trace_selftests {
1436 struct list_head list;
1437 struct tracer *type;
1438 };
1439
1440 static LIST_HEAD(postponed_selftests);
1441
1442 static int save_selftest(struct tracer *type)
1443 {
1444 struct trace_selftests *selftest;
1445
1446 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447 if (!selftest)
1448 return -ENOMEM;
1449
1450 selftest->type = type;
1451 list_add(&selftest->list, &postponed_selftests);
1452 return 0;
1453 }
1454
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457 struct trace_array *tr = &global_trace;
1458 struct tracer *saved_tracer = tr->current_trace;
1459 int ret;
1460
1461 if (!type->selftest || tracing_selftest_disabled)
1462 return 0;
1463
1464 /*
1465 * If a tracer registers early in boot up (before scheduling is
1466 * initialized and such), then do not run its selftests yet.
1467 * Instead, run it a little later in the boot process.
1468 */
1469 if (!selftests_can_run)
1470 return save_selftest(type);
1471
1472 /*
1473 * Run a selftest on this tracer.
1474 * Here we reset the trace buffer, and set the current
1475 * tracer to be this tracer. The tracer can then run some
1476 * internal tracing to verify that everything is in order.
1477 * If we fail, we do not register this tracer.
1478 */
1479 tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481 tr->current_trace = type;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 if (type->use_max_tr) {
1485 /* If we expanded the buffers, make sure the max is expanded too */
1486 if (ring_buffer_expanded)
1487 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488 RING_BUFFER_ALL_CPUS);
1489 tr->allocated_snapshot = true;
1490 }
1491 #endif
1492
1493 /* the test is responsible for initializing and enabling */
1494 pr_info("Testing tracer %s: ", type->name);
1495 ret = type->selftest(type, tr);
1496 /* the test is responsible for resetting too */
1497 tr->current_trace = saved_tracer;
1498 if (ret) {
1499 printk(KERN_CONT "FAILED!\n");
1500 /* Add the warning after printing 'FAILED' */
1501 WARN_ON(1);
1502 return -1;
1503 }
1504 /* Only reset on passing, to avoid touching corrupted buffers */
1505 tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508 if (type->use_max_tr) {
1509 tr->allocated_snapshot = false;
1510
1511 /* Shrink the max buffer again */
1512 if (ring_buffer_expanded)
1513 ring_buffer_resize(tr->max_buffer.buffer, 1,
1514 RING_BUFFER_ALL_CPUS);
1515 }
1516 #endif
1517
1518 printk(KERN_CONT "PASSED\n");
1519 return 0;
1520 }
1521
1522 static __init int init_trace_selftests(void)
1523 {
1524 struct trace_selftests *p, *n;
1525 struct tracer *t, **last;
1526 int ret;
1527
1528 selftests_can_run = true;
1529
1530 mutex_lock(&trace_types_lock);
1531
1532 if (list_empty(&postponed_selftests))
1533 goto out;
1534
1535 pr_info("Running postponed tracer tests:\n");
1536
1537 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538 ret = run_tracer_selftest(p->type);
1539 /* If the test fails, then warn and remove from available_tracers */
1540 if (ret < 0) {
1541 WARN(1, "tracer: %s failed selftest, disabling\n",
1542 p->type->name);
1543 last = &trace_types;
1544 for (t = trace_types; t; t = t->next) {
1545 if (t == p->type) {
1546 *last = t->next;
1547 break;
1548 }
1549 last = &t->next;
1550 }
1551 }
1552 list_del(&p->list);
1553 kfree(p);
1554 }
1555
1556 out:
1557 mutex_unlock(&trace_types_lock);
1558
1559 return 0;
1560 }
1561 core_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565 return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571 static void __init apply_trace_boot_options(void);
1572
1573 /**
1574 * register_tracer - register a tracer with the ftrace system.
1575 * @type - the plugin for the tracer
1576 *
1577 * Register a new plugin tracer.
1578 */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581 struct tracer *t;
1582 int ret = 0;
1583
1584 if (!type->name) {
1585 pr_info("Tracer must have a name\n");
1586 return -1;
1587 }
1588
1589 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591 return -1;
1592 }
1593
1594 mutex_lock(&trace_types_lock);
1595
1596 tracing_selftest_running = true;
1597
1598 for (t = trace_types; t; t = t->next) {
1599 if (strcmp(type->name, t->name) == 0) {
1600 /* already found */
1601 pr_info("Tracer %s already registered\n",
1602 type->name);
1603 ret = -1;
1604 goto out;
1605 }
1606 }
1607
1608 if (!type->set_flag)
1609 type->set_flag = &dummy_set_flag;
1610 if (!type->flags) {
1611 /*allocate a dummy tracer_flags*/
1612 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613 if (!type->flags) {
1614 ret = -ENOMEM;
1615 goto out;
1616 }
1617 type->flags->val = 0;
1618 type->flags->opts = dummy_tracer_opt;
1619 } else
1620 if (!type->flags->opts)
1621 type->flags->opts = dummy_tracer_opt;
1622
1623 /* store the tracer for __set_tracer_option */
1624 type->flags->trace = type;
1625
1626 ret = run_tracer_selftest(type);
1627 if (ret < 0)
1628 goto out;
1629
1630 type->next = trace_types;
1631 trace_types = type;
1632 add_tracer_options(&global_trace, type);
1633
1634 out:
1635 tracing_selftest_running = false;
1636 mutex_unlock(&trace_types_lock);
1637
1638 if (ret || !default_bootup_tracer)
1639 goto out_unlock;
1640
1641 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642 goto out_unlock;
1643
1644 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645 /* Do we want this tracer to start on bootup? */
1646 tracing_set_tracer(&global_trace, type->name);
1647 default_bootup_tracer = NULL;
1648
1649 apply_trace_boot_options();
1650
1651 /* disable other selftests, since this will break it. */
1652 tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655 type->name);
1656 #endif
1657
1658 out_unlock:
1659 return ret;
1660 }
1661
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664 struct ring_buffer *buffer = buf->buffer;
1665
1666 if (!buffer)
1667 return;
1668
1669 ring_buffer_record_disable(buffer);
1670
1671 /* Make sure all commits have finished */
1672 synchronize_sched();
1673 ring_buffer_reset_cpu(buffer, cpu);
1674
1675 ring_buffer_record_enable(buffer);
1676 }
1677
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680 struct ring_buffer *buffer = buf->buffer;
1681 int cpu;
1682
1683 if (!buffer)
1684 return;
1685
1686 ring_buffer_record_disable(buffer);
1687
1688 /* Make sure all commits have finished */
1689 synchronize_sched();
1690
1691 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693 for_each_online_cpu(cpu)
1694 ring_buffer_reset_cpu(buffer, cpu);
1695
1696 ring_buffer_record_enable(buffer);
1697 }
1698
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702 struct trace_array *tr;
1703
1704 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705 tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707 tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709 }
1710 }
1711
1712 static int *tgid_map;
1713
1714 #define SAVED_CMDLINES_DEFAULT 128
1715 #define NO_CMDLINE_MAP UINT_MAX
1716 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1717 struct saved_cmdlines_buffer {
1718 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1719 unsigned *map_cmdline_to_pid;
1720 unsigned cmdline_num;
1721 int cmdline_idx;
1722 char *saved_cmdlines;
1723 };
1724 static struct saved_cmdlines_buffer *savedcmd;
1725
1726 /* temporary disable recording */
1727 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1728
1729 static inline char *get_saved_cmdlines(int idx)
1730 {
1731 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1732 }
1733
1734 static inline void set_cmdline(int idx, const char *cmdline)
1735 {
1736 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1737 }
1738
1739 static int allocate_cmdlines_buffer(unsigned int val,
1740 struct saved_cmdlines_buffer *s)
1741 {
1742 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1743 GFP_KERNEL);
1744 if (!s->map_cmdline_to_pid)
1745 return -ENOMEM;
1746
1747 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1748 if (!s->saved_cmdlines) {
1749 kfree(s->map_cmdline_to_pid);
1750 return -ENOMEM;
1751 }
1752
1753 s->cmdline_idx = 0;
1754 s->cmdline_num = val;
1755 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1756 sizeof(s->map_pid_to_cmdline));
1757 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1758 val * sizeof(*s->map_cmdline_to_pid));
1759
1760 return 0;
1761 }
1762
1763 static int trace_create_savedcmd(void)
1764 {
1765 int ret;
1766
1767 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1768 if (!savedcmd)
1769 return -ENOMEM;
1770
1771 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1772 if (ret < 0) {
1773 kfree(savedcmd);
1774 savedcmd = NULL;
1775 return -ENOMEM;
1776 }
1777
1778 return 0;
1779 }
1780
1781 int is_tracing_stopped(void)
1782 {
1783 return global_trace.stop_count;
1784 }
1785
1786 /**
1787 * tracing_start - quick start of the tracer
1788 *
1789 * If tracing is enabled but was stopped by tracing_stop,
1790 * this will start the tracer back up.
1791 */
1792 void tracing_start(void)
1793 {
1794 struct ring_buffer *buffer;
1795 unsigned long flags;
1796
1797 if (tracing_disabled)
1798 return;
1799
1800 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1801 if (--global_trace.stop_count) {
1802 if (global_trace.stop_count < 0) {
1803 /* Someone screwed up their debugging */
1804 WARN_ON_ONCE(1);
1805 global_trace.stop_count = 0;
1806 }
1807 goto out;
1808 }
1809
1810 /* Prevent the buffers from switching */
1811 arch_spin_lock(&global_trace.max_lock);
1812
1813 buffer = global_trace.trace_buffer.buffer;
1814 if (buffer)
1815 ring_buffer_record_enable(buffer);
1816
1817 #ifdef CONFIG_TRACER_MAX_TRACE
1818 buffer = global_trace.max_buffer.buffer;
1819 if (buffer)
1820 ring_buffer_record_enable(buffer);
1821 #endif
1822
1823 arch_spin_unlock(&global_trace.max_lock);
1824
1825 out:
1826 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1827 }
1828
1829 static void tracing_start_tr(struct trace_array *tr)
1830 {
1831 struct ring_buffer *buffer;
1832 unsigned long flags;
1833
1834 if (tracing_disabled)
1835 return;
1836
1837 /* If global, we need to also start the max tracer */
1838 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1839 return tracing_start();
1840
1841 raw_spin_lock_irqsave(&tr->start_lock, flags);
1842
1843 if (--tr->stop_count) {
1844 if (tr->stop_count < 0) {
1845 /* Someone screwed up their debugging */
1846 WARN_ON_ONCE(1);
1847 tr->stop_count = 0;
1848 }
1849 goto out;
1850 }
1851
1852 buffer = tr->trace_buffer.buffer;
1853 if (buffer)
1854 ring_buffer_record_enable(buffer);
1855
1856 out:
1857 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1858 }
1859
1860 /**
1861 * tracing_stop - quick stop of the tracer
1862 *
1863 * Light weight way to stop tracing. Use in conjunction with
1864 * tracing_start.
1865 */
1866 void tracing_stop(void)
1867 {
1868 struct ring_buffer *buffer;
1869 unsigned long flags;
1870
1871 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1872 if (global_trace.stop_count++)
1873 goto out;
1874
1875 /* Prevent the buffers from switching */
1876 arch_spin_lock(&global_trace.max_lock);
1877
1878 buffer = global_trace.trace_buffer.buffer;
1879 if (buffer)
1880 ring_buffer_record_disable(buffer);
1881
1882 #ifdef CONFIG_TRACER_MAX_TRACE
1883 buffer = global_trace.max_buffer.buffer;
1884 if (buffer)
1885 ring_buffer_record_disable(buffer);
1886 #endif
1887
1888 arch_spin_unlock(&global_trace.max_lock);
1889
1890 out:
1891 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1892 }
1893
1894 static void tracing_stop_tr(struct trace_array *tr)
1895 {
1896 struct ring_buffer *buffer;
1897 unsigned long flags;
1898
1899 /* If global, we need to also stop the max tracer */
1900 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1901 return tracing_stop();
1902
1903 raw_spin_lock_irqsave(&tr->start_lock, flags);
1904 if (tr->stop_count++)
1905 goto out;
1906
1907 buffer = tr->trace_buffer.buffer;
1908 if (buffer)
1909 ring_buffer_record_disable(buffer);
1910
1911 out:
1912 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1913 }
1914
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917 unsigned pid, idx;
1918
1919 /* treat recording of idle task as a success */
1920 if (!tsk->pid)
1921 return 1;
1922
1923 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1924 return 0;
1925
1926 /*
1927 * It's not the end of the world if we don't get
1928 * the lock, but we also don't want to spin
1929 * nor do we want to disable interrupts,
1930 * so if we miss here, then better luck next time.
1931 */
1932 if (!arch_spin_trylock(&trace_cmdline_lock))
1933 return 0;
1934
1935 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1936 if (idx == NO_CMDLINE_MAP) {
1937 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1938
1939 /*
1940 * Check whether the cmdline buffer at idx has a pid
1941 * mapped. We are going to overwrite that entry so we
1942 * need to clear the map_pid_to_cmdline. Otherwise we
1943 * would read the new comm for the old pid.
1944 */
1945 pid = savedcmd->map_cmdline_to_pid[idx];
1946 if (pid != NO_CMDLINE_MAP)
1947 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1948
1949 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1950 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1951
1952 savedcmd->cmdline_idx = idx;
1953 }
1954
1955 set_cmdline(idx, tsk->comm);
1956
1957 arch_spin_unlock(&trace_cmdline_lock);
1958
1959 return 1;
1960 }
1961
1962 static void __trace_find_cmdline(int pid, char comm[])
1963 {
1964 unsigned map;
1965
1966 if (!pid) {
1967 strcpy(comm, "<idle>");
1968 return;
1969 }
1970
1971 if (WARN_ON_ONCE(pid < 0)) {
1972 strcpy(comm, "<XXX>");
1973 return;
1974 }
1975
1976 if (pid > PID_MAX_DEFAULT) {
1977 strcpy(comm, "<...>");
1978 return;
1979 }
1980
1981 map = savedcmd->map_pid_to_cmdline[pid];
1982 if (map != NO_CMDLINE_MAP)
1983 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1984 else
1985 strcpy(comm, "<...>");
1986 }
1987
1988 void trace_find_cmdline(int pid, char comm[])
1989 {
1990 preempt_disable();
1991 arch_spin_lock(&trace_cmdline_lock);
1992
1993 __trace_find_cmdline(pid, comm);
1994
1995 arch_spin_unlock(&trace_cmdline_lock);
1996 preempt_enable();
1997 }
1998
1999 int trace_find_tgid(int pid)
2000 {
2001 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2002 return 0;
2003
2004 return tgid_map[pid];
2005 }
2006
2007 static int trace_save_tgid(struct task_struct *tsk)
2008 {
2009 /* treat recording of idle task as a success */
2010 if (!tsk->pid)
2011 return 1;
2012
2013 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2014 return 0;
2015
2016 tgid_map[tsk->pid] = tsk->tgid;
2017 return 1;
2018 }
2019
2020 static bool tracing_record_taskinfo_skip(int flags)
2021 {
2022 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2023 return true;
2024 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2025 return true;
2026 if (!__this_cpu_read(trace_taskinfo_save))
2027 return true;
2028 return false;
2029 }
2030
2031 /**
2032 * tracing_record_taskinfo - record the task info of a task
2033 *
2034 * @task - task to record
2035 * @flags - TRACE_RECORD_CMDLINE for recording comm
2036 * - TRACE_RECORD_TGID for recording tgid
2037 */
2038 void tracing_record_taskinfo(struct task_struct *task, int flags)
2039 {
2040 bool done;
2041
2042 if (tracing_record_taskinfo_skip(flags))
2043 return;
2044
2045 /*
2046 * Record as much task information as possible. If some fail, continue
2047 * to try to record the others.
2048 */
2049 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2050 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2051
2052 /* If recording any information failed, retry again soon. */
2053 if (!done)
2054 return;
2055
2056 __this_cpu_write(trace_taskinfo_save, false);
2057 }
2058
2059 /**
2060 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2061 *
2062 * @prev - previous task during sched_switch
2063 * @next - next task during sched_switch
2064 * @flags - TRACE_RECORD_CMDLINE for recording comm
2065 * TRACE_RECORD_TGID for recording tgid
2066 */
2067 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2068 struct task_struct *next, int flags)
2069 {
2070 bool done;
2071
2072 if (tracing_record_taskinfo_skip(flags))
2073 return;
2074
2075 /*
2076 * Record as much task information as possible. If some fail, continue
2077 * to try to record the others.
2078 */
2079 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2080 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2081 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2082 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2083
2084 /* If recording any information failed, retry again soon. */
2085 if (!done)
2086 return;
2087
2088 __this_cpu_write(trace_taskinfo_save, false);
2089 }
2090
2091 /* Helpers to record a specific task information */
2092 void tracing_record_cmdline(struct task_struct *task)
2093 {
2094 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2095 }
2096
2097 void tracing_record_tgid(struct task_struct *task)
2098 {
2099 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2100 }
2101
2102 /*
2103 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2104 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2105 * simplifies those functions and keeps them in sync.
2106 */
2107 enum print_line_t trace_handle_return(struct trace_seq *s)
2108 {
2109 return trace_seq_has_overflowed(s) ?
2110 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2111 }
2112 EXPORT_SYMBOL_GPL(trace_handle_return);
2113
2114 void
2115 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2116 int pc)
2117 {
2118 struct task_struct *tsk = current;
2119
2120 entry->preempt_count = pc & 0xff;
2121 entry->pid = (tsk) ? tsk->pid : 0;
2122 entry->flags =
2123 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2124 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2125 #else
2126 TRACE_FLAG_IRQS_NOSUPPORT |
2127 #endif
2128 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2129 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2130 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2131 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2132 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2133 }
2134 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2135
2136 struct ring_buffer_event *
2137 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2138 int type,
2139 unsigned long len,
2140 unsigned long flags, int pc)
2141 {
2142 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2143 }
2144
2145 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2146 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2147 static int trace_buffered_event_ref;
2148
2149 /**
2150 * trace_buffered_event_enable - enable buffering events
2151 *
2152 * When events are being filtered, it is quicker to use a temporary
2153 * buffer to write the event data into if there's a likely chance
2154 * that it will not be committed. The discard of the ring buffer
2155 * is not as fast as committing, and is much slower than copying
2156 * a commit.
2157 *
2158 * When an event is to be filtered, allocate per cpu buffers to
2159 * write the event data into, and if the event is filtered and discarded
2160 * it is simply dropped, otherwise, the entire data is to be committed
2161 * in one shot.
2162 */
2163 void trace_buffered_event_enable(void)
2164 {
2165 struct ring_buffer_event *event;
2166 struct page *page;
2167 int cpu;
2168
2169 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2170
2171 if (trace_buffered_event_ref++)
2172 return;
2173
2174 for_each_tracing_cpu(cpu) {
2175 page = alloc_pages_node(cpu_to_node(cpu),
2176 GFP_KERNEL | __GFP_NORETRY, 0);
2177 if (!page)
2178 goto failed;
2179
2180 event = page_address(page);
2181 memset(event, 0, sizeof(*event));
2182
2183 per_cpu(trace_buffered_event, cpu) = event;
2184
2185 preempt_disable();
2186 if (cpu == smp_processor_id() &&
2187 this_cpu_read(trace_buffered_event) !=
2188 per_cpu(trace_buffered_event, cpu))
2189 WARN_ON_ONCE(1);
2190 preempt_enable();
2191 }
2192
2193 return;
2194 failed:
2195 trace_buffered_event_disable();
2196 }
2197
2198 static void enable_trace_buffered_event(void *data)
2199 {
2200 /* Probably not needed, but do it anyway */
2201 smp_rmb();
2202 this_cpu_dec(trace_buffered_event_cnt);
2203 }
2204
2205 static void disable_trace_buffered_event(void *data)
2206 {
2207 this_cpu_inc(trace_buffered_event_cnt);
2208 }
2209
2210 /**
2211 * trace_buffered_event_disable - disable buffering events
2212 *
2213 * When a filter is removed, it is faster to not use the buffered
2214 * events, and to commit directly into the ring buffer. Free up
2215 * the temp buffers when there are no more users. This requires
2216 * special synchronization with current events.
2217 */
2218 void trace_buffered_event_disable(void)
2219 {
2220 int cpu;
2221
2222 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2223
2224 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2225 return;
2226
2227 if (--trace_buffered_event_ref)
2228 return;
2229
2230 preempt_disable();
2231 /* For each CPU, set the buffer as used. */
2232 smp_call_function_many(tracing_buffer_mask,
2233 disable_trace_buffered_event, NULL, 1);
2234 preempt_enable();
2235
2236 /* Wait for all current users to finish */
2237 synchronize_sched();
2238
2239 for_each_tracing_cpu(cpu) {
2240 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2241 per_cpu(trace_buffered_event, cpu) = NULL;
2242 }
2243 /*
2244 * Make sure trace_buffered_event is NULL before clearing
2245 * trace_buffered_event_cnt.
2246 */
2247 smp_wmb();
2248
2249 preempt_disable();
2250 /* Do the work on each cpu */
2251 smp_call_function_many(tracing_buffer_mask,
2252 enable_trace_buffered_event, NULL, 1);
2253 preempt_enable();
2254 }
2255
2256 static struct ring_buffer *temp_buffer;
2257
2258 struct ring_buffer_event *
2259 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2260 struct trace_event_file *trace_file,
2261 int type, unsigned long len,
2262 unsigned long flags, int pc)
2263 {
2264 struct ring_buffer_event *entry;
2265 int val;
2266
2267 *current_rb = trace_file->tr->trace_buffer.buffer;
2268
2269 if ((trace_file->flags &
2270 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2271 (entry = this_cpu_read(trace_buffered_event))) {
2272 /* Try to use the per cpu buffer first */
2273 val = this_cpu_inc_return(trace_buffered_event_cnt);
2274 if (val == 1) {
2275 trace_event_setup(entry, type, flags, pc);
2276 entry->array[0] = len;
2277 return entry;
2278 }
2279 this_cpu_dec(trace_buffered_event_cnt);
2280 }
2281
2282 entry = __trace_buffer_lock_reserve(*current_rb,
2283 type, len, flags, pc);
2284 /*
2285 * If tracing is off, but we have triggers enabled
2286 * we still need to look at the event data. Use the temp_buffer
2287 * to store the trace event for the tigger to use. It's recusive
2288 * safe and will not be recorded anywhere.
2289 */
2290 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2291 *current_rb = temp_buffer;
2292 entry = __trace_buffer_lock_reserve(*current_rb,
2293 type, len, flags, pc);
2294 }
2295 return entry;
2296 }
2297 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2298
2299 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2300 static DEFINE_MUTEX(tracepoint_printk_mutex);
2301
2302 static void output_printk(struct trace_event_buffer *fbuffer)
2303 {
2304 struct trace_event_call *event_call;
2305 struct trace_event *event;
2306 unsigned long flags;
2307 struct trace_iterator *iter = tracepoint_print_iter;
2308
2309 /* We should never get here if iter is NULL */
2310 if (WARN_ON_ONCE(!iter))
2311 return;
2312
2313 event_call = fbuffer->trace_file->event_call;
2314 if (!event_call || !event_call->event.funcs ||
2315 !event_call->event.funcs->trace)
2316 return;
2317
2318 event = &fbuffer->trace_file->event_call->event;
2319
2320 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2321 trace_seq_init(&iter->seq);
2322 iter->ent = fbuffer->entry;
2323 event_call->event.funcs->trace(iter, 0, event);
2324 trace_seq_putc(&iter->seq, 0);
2325 printk("%s", iter->seq.buffer);
2326
2327 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2328 }
2329
2330 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2331 void __user *buffer, size_t *lenp,
2332 loff_t *ppos)
2333 {
2334 int save_tracepoint_printk;
2335 int ret;
2336
2337 mutex_lock(&tracepoint_printk_mutex);
2338 save_tracepoint_printk = tracepoint_printk;
2339
2340 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2341
2342 /*
2343 * This will force exiting early, as tracepoint_printk
2344 * is always zero when tracepoint_printk_iter is not allocated
2345 */
2346 if (!tracepoint_print_iter)
2347 tracepoint_printk = 0;
2348
2349 if (save_tracepoint_printk == tracepoint_printk)
2350 goto out;
2351
2352 if (tracepoint_printk)
2353 static_key_enable(&tracepoint_printk_key.key);
2354 else
2355 static_key_disable(&tracepoint_printk_key.key);
2356
2357 out:
2358 mutex_unlock(&tracepoint_printk_mutex);
2359
2360 return ret;
2361 }
2362
2363 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2364 {
2365 if (static_key_false(&tracepoint_printk_key.key))
2366 output_printk(fbuffer);
2367
2368 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2369 fbuffer->event, fbuffer->entry,
2370 fbuffer->flags, fbuffer->pc);
2371 }
2372 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2373
2374 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2375 struct ring_buffer *buffer,
2376 struct ring_buffer_event *event,
2377 unsigned long flags, int pc,
2378 struct pt_regs *regs)
2379 {
2380 __buffer_unlock_commit(buffer, event);
2381
2382 /*
2383 * If regs is not set, then skip the following callers:
2384 * trace_buffer_unlock_commit_regs
2385 * event_trigger_unlock_commit
2386 * trace_event_buffer_commit
2387 * trace_event_raw_event_sched_switch
2388 * Note, we can still get here via blktrace, wakeup tracer
2389 * and mmiotrace, but that's ok if they lose a function or
2390 * two. They are that meaningful.
2391 */
2392 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2393 ftrace_trace_userstack(buffer, flags, pc);
2394 }
2395
2396 /*
2397 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2398 */
2399 void
2400 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2401 struct ring_buffer_event *event)
2402 {
2403 __buffer_unlock_commit(buffer, event);
2404 }
2405
2406 static void
2407 trace_process_export(struct trace_export *export,
2408 struct ring_buffer_event *event)
2409 {
2410 struct trace_entry *entry;
2411 unsigned int size = 0;
2412
2413 entry = ring_buffer_event_data(event);
2414 size = ring_buffer_event_length(event);
2415 export->write(entry, size);
2416 }
2417
2418 static DEFINE_MUTEX(ftrace_export_lock);
2419
2420 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2421
2422 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2423
2424 static inline void ftrace_exports_enable(void)
2425 {
2426 static_branch_enable(&ftrace_exports_enabled);
2427 }
2428
2429 static inline void ftrace_exports_disable(void)
2430 {
2431 static_branch_disable(&ftrace_exports_enabled);
2432 }
2433
2434 void ftrace_exports(struct ring_buffer_event *event)
2435 {
2436 struct trace_export *export;
2437
2438 preempt_disable_notrace();
2439
2440 export = rcu_dereference_raw_notrace(ftrace_exports_list);
2441 while (export) {
2442 trace_process_export(export, event);
2443 export = rcu_dereference_raw_notrace(export->next);
2444 }
2445
2446 preempt_enable_notrace();
2447 }
2448
2449 static inline void
2450 add_trace_export(struct trace_export **list, struct trace_export *export)
2451 {
2452 rcu_assign_pointer(export->next, *list);
2453 /*
2454 * We are entering export into the list but another
2455 * CPU might be walking that list. We need to make sure
2456 * the export->next pointer is valid before another CPU sees
2457 * the export pointer included into the list.
2458 */
2459 rcu_assign_pointer(*list, export);
2460 }
2461
2462 static inline int
2463 rm_trace_export(struct trace_export **list, struct trace_export *export)
2464 {
2465 struct trace_export **p;
2466
2467 for (p = list; *p != NULL; p = &(*p)->next)
2468 if (*p == export)
2469 break;
2470
2471 if (*p != export)
2472 return -1;
2473
2474 rcu_assign_pointer(*p, (*p)->next);
2475
2476 return 0;
2477 }
2478
2479 static inline void
2480 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2481 {
2482 if (*list == NULL)
2483 ftrace_exports_enable();
2484
2485 add_trace_export(list, export);
2486 }
2487
2488 static inline int
2489 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2490 {
2491 int ret;
2492
2493 ret = rm_trace_export(list, export);
2494 if (*list == NULL)
2495 ftrace_exports_disable();
2496
2497 return ret;
2498 }
2499
2500 int register_ftrace_export(struct trace_export *export)
2501 {
2502 if (WARN_ON_ONCE(!export->write))
2503 return -1;
2504
2505 mutex_lock(&ftrace_export_lock);
2506
2507 add_ftrace_export(&ftrace_exports_list, export);
2508
2509 mutex_unlock(&ftrace_export_lock);
2510
2511 return 0;
2512 }
2513 EXPORT_SYMBOL_GPL(register_ftrace_export);
2514
2515 int unregister_ftrace_export(struct trace_export *export)
2516 {
2517 int ret;
2518
2519 mutex_lock(&ftrace_export_lock);
2520
2521 ret = rm_ftrace_export(&ftrace_exports_list, export);
2522
2523 mutex_unlock(&ftrace_export_lock);
2524
2525 return ret;
2526 }
2527 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2528
2529 void
2530 trace_function(struct trace_array *tr,
2531 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2532 int pc)
2533 {
2534 struct trace_event_call *call = &event_function;
2535 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2536 struct ring_buffer_event *event;
2537 struct ftrace_entry *entry;
2538
2539 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2540 flags, pc);
2541 if (!event)
2542 return;
2543 entry = ring_buffer_event_data(event);
2544 entry->ip = ip;
2545 entry->parent_ip = parent_ip;
2546
2547 if (!call_filter_check_discard(call, entry, buffer, event)) {
2548 if (static_branch_unlikely(&ftrace_exports_enabled))
2549 ftrace_exports(event);
2550 __buffer_unlock_commit(buffer, event);
2551 }
2552 }
2553
2554 #ifdef CONFIG_STACKTRACE
2555
2556 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2557 struct ftrace_stack {
2558 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2559 };
2560
2561 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2562 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2563
2564 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2565 unsigned long flags,
2566 int skip, int pc, struct pt_regs *regs)
2567 {
2568 struct trace_event_call *call = &event_kernel_stack;
2569 struct ring_buffer_event *event;
2570 struct stack_entry *entry;
2571 struct stack_trace trace;
2572 int use_stack;
2573 int size = FTRACE_STACK_ENTRIES;
2574
2575 trace.nr_entries = 0;
2576 trace.skip = skip;
2577
2578 /*
2579 * Add two, for this function and the call to save_stack_trace()
2580 * If regs is set, then these functions will not be in the way.
2581 */
2582 if (!regs)
2583 trace.skip += 2;
2584
2585 /*
2586 * Since events can happen in NMIs there's no safe way to
2587 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2588 * or NMI comes in, it will just have to use the default
2589 * FTRACE_STACK_SIZE.
2590 */
2591 preempt_disable_notrace();
2592
2593 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2594 /*
2595 * We don't need any atomic variables, just a barrier.
2596 * If an interrupt comes in, we don't care, because it would
2597 * have exited and put the counter back to what we want.
2598 * We just need a barrier to keep gcc from moving things
2599 * around.
2600 */
2601 barrier();
2602 if (use_stack == 1) {
2603 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2604 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2605
2606 if (regs)
2607 save_stack_trace_regs(regs, &trace);
2608 else
2609 save_stack_trace(&trace);
2610
2611 if (trace.nr_entries > size)
2612 size = trace.nr_entries;
2613 } else
2614 /* From now on, use_stack is a boolean */
2615 use_stack = 0;
2616
2617 size *= sizeof(unsigned long);
2618
2619 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2620 sizeof(*entry) + size, flags, pc);
2621 if (!event)
2622 goto out;
2623 entry = ring_buffer_event_data(event);
2624
2625 memset(&entry->caller, 0, size);
2626
2627 if (use_stack)
2628 memcpy(&entry->caller, trace.entries,
2629 trace.nr_entries * sizeof(unsigned long));
2630 else {
2631 trace.max_entries = FTRACE_STACK_ENTRIES;
2632 trace.entries = entry->caller;
2633 if (regs)
2634 save_stack_trace_regs(regs, &trace);
2635 else
2636 save_stack_trace(&trace);
2637 }
2638
2639 entry->size = trace.nr_entries;
2640
2641 if (!call_filter_check_discard(call, entry, buffer, event))
2642 __buffer_unlock_commit(buffer, event);
2643
2644 out:
2645 /* Again, don't let gcc optimize things here */
2646 barrier();
2647 __this_cpu_dec(ftrace_stack_reserve);
2648 preempt_enable_notrace();
2649
2650 }
2651
2652 static inline void ftrace_trace_stack(struct trace_array *tr,
2653 struct ring_buffer *buffer,
2654 unsigned long flags,
2655 int skip, int pc, struct pt_regs *regs)
2656 {
2657 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2658 return;
2659
2660 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2661 }
2662
2663 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2664 int pc)
2665 {
2666 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2667
2668 if (rcu_is_watching()) {
2669 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2670 return;
2671 }
2672
2673 /*
2674 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2675 * but if the above rcu_is_watching() failed, then the NMI
2676 * triggered someplace critical, and rcu_irq_enter() should
2677 * not be called from NMI.
2678 */
2679 if (unlikely(in_nmi()))
2680 return;
2681
2682 /*
2683 * It is possible that a function is being traced in a
2684 * location that RCU is not watching. A call to
2685 * rcu_irq_enter() will make sure that it is, but there's
2686 * a few internal rcu functions that could be traced
2687 * where that wont work either. In those cases, we just
2688 * do nothing.
2689 */
2690 if (unlikely(rcu_irq_enter_disabled()))
2691 return;
2692
2693 rcu_irq_enter_irqson();
2694 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2695 rcu_irq_exit_irqson();
2696 }
2697
2698 /**
2699 * trace_dump_stack - record a stack back trace in the trace buffer
2700 * @skip: Number of functions to skip (helper handlers)
2701 */
2702 void trace_dump_stack(int skip)
2703 {
2704 unsigned long flags;
2705
2706 if (tracing_disabled || tracing_selftest_running)
2707 return;
2708
2709 local_save_flags(flags);
2710
2711 /*
2712 * Skip 3 more, seems to get us at the caller of
2713 * this function.
2714 */
2715 skip += 3;
2716 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2717 flags, skip, preempt_count(), NULL);
2718 }
2719
2720 static DEFINE_PER_CPU(int, user_stack_count);
2721
2722 void
2723 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2724 {
2725 struct trace_event_call *call = &event_user_stack;
2726 struct ring_buffer_event *event;
2727 struct userstack_entry *entry;
2728 struct stack_trace trace;
2729
2730 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2731 return;
2732
2733 /*
2734 * NMIs can not handle page faults, even with fix ups.
2735 * The save user stack can (and often does) fault.
2736 */
2737 if (unlikely(in_nmi()))
2738 return;
2739
2740 /*
2741 * prevent recursion, since the user stack tracing may
2742 * trigger other kernel events.
2743 */
2744 preempt_disable();
2745 if (__this_cpu_read(user_stack_count))
2746 goto out;
2747
2748 __this_cpu_inc(user_stack_count);
2749
2750 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2751 sizeof(*entry), flags, pc);
2752 if (!event)
2753 goto out_drop_count;
2754 entry = ring_buffer_event_data(event);
2755
2756 entry->tgid = current->tgid;
2757 memset(&entry->caller, 0, sizeof(entry->caller));
2758
2759 trace.nr_entries = 0;
2760 trace.max_entries = FTRACE_STACK_ENTRIES;
2761 trace.skip = 0;
2762 trace.entries = entry->caller;
2763
2764 save_stack_trace_user(&trace);
2765 if (!call_filter_check_discard(call, entry, buffer, event))
2766 __buffer_unlock_commit(buffer, event);
2767
2768 out_drop_count:
2769 __this_cpu_dec(user_stack_count);
2770 out:
2771 preempt_enable();
2772 }
2773
2774 #ifdef UNUSED
2775 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2776 {
2777 ftrace_trace_userstack(tr, flags, preempt_count());
2778 }
2779 #endif /* UNUSED */
2780
2781 #endif /* CONFIG_STACKTRACE */
2782
2783 /* created for use with alloc_percpu */
2784 struct trace_buffer_struct {
2785 int nesting;
2786 char buffer[4][TRACE_BUF_SIZE];
2787 };
2788
2789 static struct trace_buffer_struct *trace_percpu_buffer;
2790
2791 /*
2792 * Thise allows for lockless recording. If we're nested too deeply, then
2793 * this returns NULL.
2794 */
2795 static char *get_trace_buf(void)
2796 {
2797 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2798
2799 if (!buffer || buffer->nesting >= 4)
2800 return NULL;
2801
2802 buffer->nesting++;
2803
2804 /* Interrupts must see nesting incremented before we use the buffer */
2805 barrier();
2806 return &buffer->buffer[buffer->nesting][0];
2807 }
2808
2809 static void put_trace_buf(void)
2810 {
2811 /* Don't let the decrement of nesting leak before this */
2812 barrier();
2813 this_cpu_dec(trace_percpu_buffer->nesting);
2814 }
2815
2816 static int alloc_percpu_trace_buffer(void)
2817 {
2818 struct trace_buffer_struct *buffers;
2819
2820 buffers = alloc_percpu(struct trace_buffer_struct);
2821 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2822 return -ENOMEM;
2823
2824 trace_percpu_buffer = buffers;
2825 return 0;
2826 }
2827
2828 static int buffers_allocated;
2829
2830 void trace_printk_init_buffers(void)
2831 {
2832 if (buffers_allocated)
2833 return;
2834
2835 if (alloc_percpu_trace_buffer())
2836 return;
2837
2838 /* trace_printk() is for debug use only. Don't use it in production. */
2839
2840 pr_warn("\n");
2841 pr_warn("**********************************************************\n");
2842 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2843 pr_warn("** **\n");
2844 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
2845 pr_warn("** **\n");
2846 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
2847 pr_warn("** unsafe for production use. **\n");
2848 pr_warn("** **\n");
2849 pr_warn("** If you see this message and you are not debugging **\n");
2850 pr_warn("** the kernel, report this immediately to your vendor! **\n");
2851 pr_warn("** **\n");
2852 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2853 pr_warn("**********************************************************\n");
2854
2855 /* Expand the buffers to set size */
2856 tracing_update_buffers();
2857
2858 buffers_allocated = 1;
2859
2860 /*
2861 * trace_printk_init_buffers() can be called by modules.
2862 * If that happens, then we need to start cmdline recording
2863 * directly here. If the global_trace.buffer is already
2864 * allocated here, then this was called by module code.
2865 */
2866 if (global_trace.trace_buffer.buffer)
2867 tracing_start_cmdline_record();
2868 }
2869
2870 void trace_printk_start_comm(void)
2871 {
2872 /* Start tracing comms if trace printk is set */
2873 if (!buffers_allocated)
2874 return;
2875 tracing_start_cmdline_record();
2876 }
2877
2878 static void trace_printk_start_stop_comm(int enabled)
2879 {
2880 if (!buffers_allocated)
2881 return;
2882
2883 if (enabled)
2884 tracing_start_cmdline_record();
2885 else
2886 tracing_stop_cmdline_record();
2887 }
2888
2889 /**
2890 * trace_vbprintk - write binary msg to tracing buffer
2891 *
2892 */
2893 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2894 {
2895 struct trace_event_call *call = &event_bprint;
2896 struct ring_buffer_event *event;
2897 struct ring_buffer *buffer;
2898 struct trace_array *tr = &global_trace;
2899 struct bprint_entry *entry;
2900 unsigned long flags;
2901 char *tbuffer;
2902 int len = 0, size, pc;
2903
2904 if (unlikely(tracing_selftest_running || tracing_disabled))
2905 return 0;
2906
2907 /* Don't pollute graph traces with trace_vprintk internals */
2908 pause_graph_tracing();
2909
2910 pc = preempt_count();
2911 preempt_disable_notrace();
2912
2913 tbuffer = get_trace_buf();
2914 if (!tbuffer) {
2915 len = 0;
2916 goto out_nobuffer;
2917 }
2918
2919 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2920
2921 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2922 goto out;
2923
2924 local_save_flags(flags);
2925 size = sizeof(*entry) + sizeof(u32) * len;
2926 buffer = tr->trace_buffer.buffer;
2927 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2928 flags, pc);
2929 if (!event)
2930 goto out;
2931 entry = ring_buffer_event_data(event);
2932 entry->ip = ip;
2933 entry->fmt = fmt;
2934
2935 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2936 if (!call_filter_check_discard(call, entry, buffer, event)) {
2937 __buffer_unlock_commit(buffer, event);
2938 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2939 }
2940
2941 out:
2942 put_trace_buf();
2943
2944 out_nobuffer:
2945 preempt_enable_notrace();
2946 unpause_graph_tracing();
2947
2948 return len;
2949 }
2950 EXPORT_SYMBOL_GPL(trace_vbprintk);
2951
2952 static int
2953 __trace_array_vprintk(struct ring_buffer *buffer,
2954 unsigned long ip, const char *fmt, va_list args)
2955 {
2956 struct trace_event_call *call = &event_print;
2957 struct ring_buffer_event *event;
2958 int len = 0, size, pc;
2959 struct print_entry *entry;
2960 unsigned long flags;
2961 char *tbuffer;
2962
2963 if (tracing_disabled || tracing_selftest_running)
2964 return 0;
2965
2966 /* Don't pollute graph traces with trace_vprintk internals */
2967 pause_graph_tracing();
2968
2969 pc = preempt_count();
2970 preempt_disable_notrace();
2971
2972
2973 tbuffer = get_trace_buf();
2974 if (!tbuffer) {
2975 len = 0;
2976 goto out_nobuffer;
2977 }
2978
2979 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2980
2981 local_save_flags(flags);
2982 size = sizeof(*entry) + len + 1;
2983 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2984 flags, pc);
2985 if (!event)
2986 goto out;
2987 entry = ring_buffer_event_data(event);
2988 entry->ip = ip;
2989
2990 memcpy(&entry->buf, tbuffer, len + 1);
2991 if (!call_filter_check_discard(call, entry, buffer, event)) {
2992 __buffer_unlock_commit(buffer, event);
2993 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2994 }
2995
2996 out:
2997 put_trace_buf();
2998
2999 out_nobuffer:
3000 preempt_enable_notrace();
3001 unpause_graph_tracing();
3002
3003 return len;
3004 }
3005
3006 int trace_array_vprintk(struct trace_array *tr,
3007 unsigned long ip, const char *fmt, va_list args)
3008 {
3009 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3010 }
3011
3012 int trace_array_printk(struct trace_array *tr,
3013 unsigned long ip, const char *fmt, ...)
3014 {
3015 int ret;
3016 va_list ap;
3017
3018 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3019 return 0;
3020
3021 va_start(ap, fmt);
3022 ret = trace_array_vprintk(tr, ip, fmt, ap);
3023 va_end(ap);
3024 return ret;
3025 }
3026
3027 int trace_array_printk_buf(struct ring_buffer *buffer,
3028 unsigned long ip, const char *fmt, ...)
3029 {
3030 int ret;
3031 va_list ap;
3032
3033 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3034 return 0;
3035
3036 va_start(ap, fmt);
3037 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3038 va_end(ap);
3039 return ret;
3040 }
3041
3042 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3043 {
3044 return trace_array_vprintk(&global_trace, ip, fmt, args);
3045 }
3046 EXPORT_SYMBOL_GPL(trace_vprintk);
3047
3048 static void trace_iterator_increment(struct trace_iterator *iter)
3049 {
3050 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3051
3052 iter->idx++;
3053 if (buf_iter)
3054 ring_buffer_read(buf_iter, NULL);
3055 }
3056
3057 static struct trace_entry *
3058 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3059 unsigned long *lost_events)
3060 {
3061 struct ring_buffer_event *event;
3062 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3063
3064 if (buf_iter)
3065 event = ring_buffer_iter_peek(buf_iter, ts);
3066 else
3067 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3068 lost_events);
3069
3070 if (event) {
3071 iter->ent_size = ring_buffer_event_length(event);
3072 return ring_buffer_event_data(event);
3073 }
3074 iter->ent_size = 0;
3075 return NULL;
3076 }
3077
3078 static struct trace_entry *
3079 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3080 unsigned long *missing_events, u64 *ent_ts)
3081 {
3082 struct ring_buffer *buffer = iter->trace_buffer->buffer;
3083 struct trace_entry *ent, *next = NULL;
3084 unsigned long lost_events = 0, next_lost = 0;
3085 int cpu_file = iter->cpu_file;
3086 u64 next_ts = 0, ts;
3087 int next_cpu = -1;
3088 int next_size = 0;
3089 int cpu;
3090
3091 /*
3092 * If we are in a per_cpu trace file, don't bother by iterating over
3093 * all cpu and peek directly.
3094 */
3095 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3096 if (ring_buffer_empty_cpu(buffer, cpu_file))
3097 return NULL;
3098 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3099 if (ent_cpu)
3100 *ent_cpu = cpu_file;
3101
3102 return ent;
3103 }
3104
3105 for_each_tracing_cpu(cpu) {
3106
3107 if (ring_buffer_empty_cpu(buffer, cpu))
3108 continue;
3109
3110 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3111
3112 /*
3113 * Pick the entry with the smallest timestamp:
3114 */
3115 if (ent && (!next || ts < next_ts)) {
3116 next = ent;
3117 next_cpu = cpu;
3118 next_ts = ts;
3119 next_lost = lost_events;
3120 next_size = iter->ent_size;
3121 }
3122 }
3123
3124 iter->ent_size = next_size;
3125
3126 if (ent_cpu)
3127 *ent_cpu = next_cpu;
3128
3129 if (ent_ts)
3130 *ent_ts = next_ts;
3131
3132 if (missing_events)
3133 *missing_events = next_lost;
3134
3135 return next;
3136 }
3137
3138 /* Find the next real entry, without updating the iterator itself */
3139 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3140 int *ent_cpu, u64 *ent_ts)
3141 {
3142 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3143 }
3144
3145 /* Find the next real entry, and increment the iterator to the next entry */
3146 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3147 {
3148 iter->ent = __find_next_entry(iter, &iter->cpu,
3149 &iter->lost_events, &iter->ts);
3150
3151 if (iter->ent)
3152 trace_iterator_increment(iter);
3153
3154 return iter->ent ? iter : NULL;
3155 }
3156
3157 static void trace_consume(struct trace_iterator *iter)
3158 {
3159 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3160 &iter->lost_events);
3161 }
3162
3163 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3164 {
3165 struct trace_iterator *iter = m->private;
3166 int i = (int)*pos;
3167 void *ent;
3168
3169 WARN_ON_ONCE(iter->leftover);
3170
3171 (*pos)++;
3172
3173 /* can't go backwards */
3174 if (iter->idx > i)
3175 return NULL;
3176
3177 if (iter->idx < 0)
3178 ent = trace_find_next_entry_inc(iter);
3179 else
3180 ent = iter;
3181
3182 while (ent && iter->idx < i)
3183 ent = trace_find_next_entry_inc(iter);
3184
3185 iter->pos = *pos;
3186
3187 return ent;
3188 }
3189
3190 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3191 {
3192 struct ring_buffer_event *event;
3193 struct ring_buffer_iter *buf_iter;
3194 unsigned long entries = 0;
3195 u64 ts;
3196
3197 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3198
3199 buf_iter = trace_buffer_iter(iter, cpu);
3200 if (!buf_iter)
3201 return;
3202
3203 ring_buffer_iter_reset(buf_iter);
3204
3205 /*
3206 * We could have the case with the max latency tracers
3207 * that a reset never took place on a cpu. This is evident
3208 * by the timestamp being before the start of the buffer.
3209 */
3210 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3211 if (ts >= iter->trace_buffer->time_start)
3212 break;
3213 entries++;
3214 ring_buffer_read(buf_iter, NULL);
3215 }
3216
3217 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3218 }
3219
3220 /*
3221 * The current tracer is copied to avoid a global locking
3222 * all around.
3223 */
3224 static void *s_start(struct seq_file *m, loff_t *pos)
3225 {
3226 struct trace_iterator *iter = m->private;
3227 struct trace_array *tr = iter->tr;
3228 int cpu_file = iter->cpu_file;
3229 void *p = NULL;
3230 loff_t l = 0;
3231 int cpu;
3232
3233 /*
3234 * copy the tracer to avoid using a global lock all around.
3235 * iter->trace is a copy of current_trace, the pointer to the
3236 * name may be used instead of a strcmp(), as iter->trace->name
3237 * will point to the same string as current_trace->name.
3238 */
3239 mutex_lock(&trace_types_lock);
3240 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3241 *iter->trace = *tr->current_trace;
3242 mutex_unlock(&trace_types_lock);
3243
3244 #ifdef CONFIG_TRACER_MAX_TRACE
3245 if (iter->snapshot && iter->trace->use_max_tr)
3246 return ERR_PTR(-EBUSY);
3247 #endif
3248
3249 if (!iter->snapshot)
3250 atomic_inc(&trace_record_taskinfo_disabled);
3251
3252 if (*pos != iter->pos) {
3253 iter->ent = NULL;
3254 iter->cpu = 0;
3255 iter->idx = -1;
3256
3257 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3258 for_each_tracing_cpu(cpu)
3259 tracing_iter_reset(iter, cpu);
3260 } else
3261 tracing_iter_reset(iter, cpu_file);
3262
3263 iter->leftover = 0;
3264 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3265 ;
3266
3267 } else {
3268 /*
3269 * If we overflowed the seq_file before, then we want
3270 * to just reuse the trace_seq buffer again.
3271 */
3272 if (iter->leftover)
3273 p = iter;
3274 else {
3275 l = *pos - 1;
3276 p = s_next(m, p, &l);
3277 }
3278 }
3279
3280 trace_event_read_lock();
3281 trace_access_lock(cpu_file);
3282 return p;
3283 }
3284
3285 static void s_stop(struct seq_file *m, void *p)
3286 {
3287 struct trace_iterator *iter = m->private;
3288
3289 #ifdef CONFIG_TRACER_MAX_TRACE
3290 if (iter->snapshot && iter->trace->use_max_tr)
3291 return;
3292 #endif
3293
3294 if (!iter->snapshot)
3295 atomic_dec(&trace_record_taskinfo_disabled);
3296
3297 trace_access_unlock(iter->cpu_file);
3298 trace_event_read_unlock();
3299 }
3300
3301 static void
3302 get_total_entries(struct trace_buffer *buf,
3303 unsigned long *total, unsigned long *entries)
3304 {
3305 unsigned long count;
3306 int cpu;
3307
3308 *total = 0;
3309 *entries = 0;
3310
3311 for_each_tracing_cpu(cpu) {
3312 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3313 /*
3314 * If this buffer has skipped entries, then we hold all
3315 * entries for the trace and we need to ignore the
3316 * ones before the time stamp.
3317 */
3318 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3319 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3320 /* total is the same as the entries */
3321 *total += count;
3322 } else
3323 *total += count +
3324 ring_buffer_overrun_cpu(buf->buffer, cpu);
3325 *entries += count;
3326 }
3327 }
3328
3329 static void print_lat_help_header(struct seq_file *m)
3330 {
3331 seq_puts(m, "# _------=> CPU# \n"
3332 "# / _-----=> irqs-off \n"
3333 "# | / _----=> need-resched \n"
3334 "# || / _---=> hardirq/softirq \n"
3335 "# ||| / _--=> preempt-depth \n"
3336 "# |||| / delay \n"
3337 "# cmd pid ||||| time | caller \n"
3338 "# \\ / ||||| \\ | / \n");
3339 }
3340
3341 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3342 {
3343 unsigned long total;
3344 unsigned long entries;
3345
3346 get_total_entries(buf, &total, &entries);
3347 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3348 entries, total, num_online_cpus());
3349 seq_puts(m, "#\n");
3350 }
3351
3352 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3353 unsigned int flags)
3354 {
3355 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3356
3357 print_event_info(buf, m);
3358
3359 seq_printf(m, "# TASK-PID CPU# %s TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
3360 seq_printf(m, "# | | | %s | |\n", tgid ? " | " : "");
3361 }
3362
3363 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3364 unsigned int flags)
3365 {
3366 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3367 const char tgid_space[] = " ";
3368 const char space[] = " ";
3369
3370 seq_printf(m, "# %s _-----=> irqs-off\n",
3371 tgid ? tgid_space : space);
3372 seq_printf(m, "# %s / _----=> need-resched\n",
3373 tgid ? tgid_space : space);
3374 seq_printf(m, "# %s| / _---=> hardirq/softirq\n",
3375 tgid ? tgid_space : space);
3376 seq_printf(m, "# %s|| / _--=> preempt-depth\n",
3377 tgid ? tgid_space : space);
3378 seq_printf(m, "# %s||| / delay\n",
3379 tgid ? tgid_space : space);
3380 seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n",
3381 tgid ? " TGID " : space);
3382 seq_printf(m, "# | | | %s|||| | |\n",
3383 tgid ? " | " : space);
3384 }
3385
3386 void
3387 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3388 {
3389 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3390 struct trace_buffer *buf = iter->trace_buffer;
3391 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3392 struct tracer *type = iter->trace;
3393 unsigned long entries;
3394 unsigned long total;
3395 const char *name = "preemption";
3396
3397 name = type->name;
3398
3399 get_total_entries(buf, &total, &entries);
3400
3401 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3402 name, UTS_RELEASE);
3403 seq_puts(m, "# -----------------------------------"
3404 "---------------------------------\n");
3405 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3406 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3407 nsecs_to_usecs(data->saved_latency),
3408 entries,
3409 total,
3410 buf->cpu,
3411 #if defined(CONFIG_PREEMPT_NONE)
3412 "server",
3413 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3414 "desktop",
3415 #elif defined(CONFIG_PREEMPT)
3416 "preempt",
3417 #else
3418 "unknown",
3419 #endif
3420 /* These are reserved for later use */
3421 0, 0, 0, 0);
3422 #ifdef CONFIG_SMP
3423 seq_printf(m, " #P:%d)\n", num_online_cpus());
3424 #else
3425 seq_puts(m, ")\n");
3426 #endif
3427 seq_puts(m, "# -----------------\n");
3428 seq_printf(m, "# | task: %.16s-%d "
3429 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3430 data->comm, data->pid,
3431 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3432 data->policy, data->rt_priority);
3433 seq_puts(m, "# -----------------\n");
3434
3435 if (data->critical_start) {
3436 seq_puts(m, "# => started at: ");
3437 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3438 trace_print_seq(m, &iter->seq);
3439 seq_puts(m, "\n# => ended at: ");
3440 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3441 trace_print_seq(m, &iter->seq);
3442 seq_puts(m, "\n#\n");
3443 }
3444
3445 seq_puts(m, "#\n");
3446 }
3447
3448 static void test_cpu_buff_start(struct trace_iterator *iter)
3449 {
3450 struct trace_seq *s = &iter->seq;
3451 struct trace_array *tr = iter->tr;
3452
3453 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3454 return;
3455
3456 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3457 return;
3458
3459 if (cpumask_available(iter->started) &&
3460 cpumask_test_cpu(iter->cpu, iter->started))
3461 return;
3462
3463 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3464 return;
3465
3466 if (cpumask_available(iter->started))
3467 cpumask_set_cpu(iter->cpu, iter->started);
3468
3469 /* Don't print started cpu buffer for the first entry of the trace */
3470 if (iter->idx > 1)
3471 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3472 iter->cpu);
3473 }
3474
3475 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3476 {
3477 struct trace_array *tr = iter->tr;
3478 struct trace_seq *s = &iter->seq;
3479 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3480 struct trace_entry *entry;
3481 struct trace_event *event;
3482
3483 entry = iter->ent;
3484
3485 test_cpu_buff_start(iter);
3486
3487 event = ftrace_find_event(entry->type);
3488
3489 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3490 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3491 trace_print_lat_context(iter);
3492 else
3493 trace_print_context(iter);
3494 }
3495
3496 if (trace_seq_has_overflowed(s))
3497 return TRACE_TYPE_PARTIAL_LINE;
3498
3499 if (event)
3500 return event->funcs->trace(iter, sym_flags, event);
3501
3502 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3503
3504 return trace_handle_return(s);
3505 }
3506
3507 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3508 {
3509 struct trace_array *tr = iter->tr;
3510 struct trace_seq *s = &iter->seq;
3511 struct trace_entry *entry;
3512 struct trace_event *event;
3513
3514 entry = iter->ent;
3515
3516 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3517 trace_seq_printf(s, "%d %d %llu ",
3518 entry->pid, iter->cpu, iter->ts);
3519
3520 if (trace_seq_has_overflowed(s))
3521 return TRACE_TYPE_PARTIAL_LINE;
3522
3523 event = ftrace_find_event(entry->type);
3524 if (event)
3525 return event->funcs->raw(iter, 0, event);
3526
3527 trace_seq_printf(s, "%d ?\n", entry->type);
3528
3529 return trace_handle_return(s);
3530 }
3531
3532 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3533 {
3534 struct trace_array *tr = iter->tr;
3535 struct trace_seq *s = &iter->seq;
3536 unsigned char newline = '\n';
3537 struct trace_entry *entry;
3538 struct trace_event *event;
3539
3540 entry = iter->ent;
3541
3542 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3543 SEQ_PUT_HEX_FIELD(s, entry->pid);
3544 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3545 SEQ_PUT_HEX_FIELD(s, iter->ts);
3546 if (trace_seq_has_overflowed(s))
3547 return TRACE_TYPE_PARTIAL_LINE;
3548 }
3549
3550 event = ftrace_find_event(entry->type);
3551 if (event) {
3552 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3553 if (ret != TRACE_TYPE_HANDLED)
3554 return ret;
3555 }
3556
3557 SEQ_PUT_FIELD(s, newline);
3558
3559 return trace_handle_return(s);
3560 }
3561
3562 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3563 {
3564 struct trace_array *tr = iter->tr;
3565 struct trace_seq *s = &iter->seq;
3566 struct trace_entry *entry;
3567 struct trace_event *event;
3568
3569 entry = iter->ent;
3570
3571 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3572 SEQ_PUT_FIELD(s, entry->pid);
3573 SEQ_PUT_FIELD(s, iter->cpu);
3574 SEQ_PUT_FIELD(s, iter->ts);
3575 if (trace_seq_has_overflowed(s))
3576 return TRACE_TYPE_PARTIAL_LINE;
3577 }
3578
3579 event = ftrace_find_event(entry->type);
3580 return event ? event->funcs->binary(iter, 0, event) :
3581 TRACE_TYPE_HANDLED;
3582 }
3583
3584 int trace_empty(struct trace_iterator *iter)
3585 {
3586 struct ring_buffer_iter *buf_iter;
3587 int cpu;
3588
3589 /* If we are looking at one CPU buffer, only check that one */
3590 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3591 cpu = iter->cpu_file;
3592 buf_iter = trace_buffer_iter(iter, cpu);
3593 if (buf_iter) {
3594 if (!ring_buffer_iter_empty(buf_iter))
3595 return 0;
3596 } else {
3597 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3598 return 0;
3599 }
3600 return 1;
3601 }
3602
3603 for_each_tracing_cpu(cpu) {
3604 buf_iter = trace_buffer_iter(iter, cpu);
3605 if (buf_iter) {
3606 if (!ring_buffer_iter_empty(buf_iter))
3607 return 0;
3608 } else {
3609 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3610 return 0;
3611 }
3612 }
3613
3614 return 1;
3615 }
3616
3617 /* Called with trace_event_read_lock() held. */
3618 enum print_line_t print_trace_line(struct trace_iterator *iter)
3619 {
3620 struct trace_array *tr = iter->tr;
3621 unsigned long trace_flags = tr->trace_flags;
3622 enum print_line_t ret;
3623
3624 if (iter->lost_events) {
3625 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3626 iter->cpu, iter->lost_events);
3627 if (trace_seq_has_overflowed(&iter->seq))
3628 return TRACE_TYPE_PARTIAL_LINE;
3629 }
3630
3631 if (iter->trace && iter->trace->print_line) {
3632 ret = iter->trace->print_line(iter);
3633 if (ret != TRACE_TYPE_UNHANDLED)
3634 return ret;
3635 }
3636
3637 if (iter->ent->type == TRACE_BPUTS &&
3638 trace_flags & TRACE_ITER_PRINTK &&
3639 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3640 return trace_print_bputs_msg_only(iter);
3641
3642 if (iter->ent->type == TRACE_BPRINT &&
3643 trace_flags & TRACE_ITER_PRINTK &&
3644 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3645 return trace_print_bprintk_msg_only(iter);
3646
3647 if (iter->ent->type == TRACE_PRINT &&
3648 trace_flags & TRACE_ITER_PRINTK &&
3649 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3650 return trace_print_printk_msg_only(iter);
3651
3652 if (trace_flags & TRACE_ITER_BIN)
3653 return print_bin_fmt(iter);
3654
3655 if (trace_flags & TRACE_ITER_HEX)
3656 return print_hex_fmt(iter);
3657
3658 if (trace_flags & TRACE_ITER_RAW)
3659 return print_raw_fmt(iter);
3660
3661 return print_trace_fmt(iter);
3662 }
3663
3664 void trace_latency_header(struct seq_file *m)
3665 {
3666 struct trace_iterator *iter = m->private;
3667 struct trace_array *tr = iter->tr;
3668
3669 /* print nothing if the buffers are empty */
3670 if (trace_empty(iter))
3671 return;
3672
3673 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3674 print_trace_header(m, iter);
3675
3676 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3677 print_lat_help_header(m);
3678 }
3679
3680 void trace_default_header(struct seq_file *m)
3681 {
3682 struct trace_iterator *iter = m->private;
3683 struct trace_array *tr = iter->tr;
3684 unsigned long trace_flags = tr->trace_flags;
3685
3686 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3687 return;
3688
3689 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3690 /* print nothing if the buffers are empty */
3691 if (trace_empty(iter))
3692 return;
3693 print_trace_header(m, iter);
3694 if (!(trace_flags & TRACE_ITER_VERBOSE))
3695 print_lat_help_header(m);
3696 } else {
3697 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3698 if (trace_flags & TRACE_ITER_IRQ_INFO)
3699 print_func_help_header_irq(iter->trace_buffer,
3700 m, trace_flags);
3701 else
3702 print_func_help_header(iter->trace_buffer, m,
3703 trace_flags);
3704 }
3705 }
3706 }
3707
3708 static void test_ftrace_alive(struct seq_file *m)
3709 {
3710 if (!ftrace_is_dead())
3711 return;
3712 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3713 "# MAY BE MISSING FUNCTION EVENTS\n");
3714 }
3715
3716 #ifdef CONFIG_TRACER_MAX_TRACE
3717 static void show_snapshot_main_help(struct seq_file *m)
3718 {
3719 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3720 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3721 "# Takes a snapshot of the main buffer.\n"
3722 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3723 "# (Doesn't have to be '2' works with any number that\n"
3724 "# is not a '0' or '1')\n");
3725 }
3726
3727 static void show_snapshot_percpu_help(struct seq_file *m)
3728 {
3729 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3730 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3731 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3732 "# Takes a snapshot of the main buffer for this cpu.\n");
3733 #else
3734 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3735 "# Must use main snapshot file to allocate.\n");
3736 #endif
3737 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3738 "# (Doesn't have to be '2' works with any number that\n"
3739 "# is not a '0' or '1')\n");
3740 }
3741
3742 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3743 {
3744 if (iter->tr->allocated_snapshot)
3745 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3746 else
3747 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3748
3749 seq_puts(m, "# Snapshot commands:\n");
3750 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3751 show_snapshot_main_help(m);
3752 else
3753 show_snapshot_percpu_help(m);
3754 }
3755 #else
3756 /* Should never be called */
3757 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3758 #endif
3759
3760 static int s_show(struct seq_file *m, void *v)
3761 {
3762 struct trace_iterator *iter = v;
3763 int ret;
3764
3765 if (iter->ent == NULL) {
3766 if (iter->tr) {
3767 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3768 seq_puts(m, "#\n");
3769 test_ftrace_alive(m);
3770 }
3771 if (iter->snapshot && trace_empty(iter))
3772 print_snapshot_help(m, iter);
3773 else if (iter->trace && iter->trace->print_header)
3774 iter->trace->print_header(m);
3775 else
3776 trace_default_header(m);
3777
3778 } else if (iter->leftover) {
3779 /*
3780 * If we filled the seq_file buffer earlier, we
3781 * want to just show it now.
3782 */
3783 ret = trace_print_seq(m, &iter->seq);
3784
3785 /* ret should this time be zero, but you never know */
3786 iter->leftover = ret;
3787
3788 } else {
3789 print_trace_line(iter);
3790 ret = trace_print_seq(m, &iter->seq);
3791 /*
3792 * If we overflow the seq_file buffer, then it will
3793 * ask us for this data again at start up.
3794 * Use that instead.
3795 * ret is 0 if seq_file write succeeded.
3796 * -1 otherwise.
3797 */
3798 iter->leftover = ret;
3799 }
3800
3801 return 0;
3802 }
3803
3804 /*
3805 * Should be used after trace_array_get(), trace_types_lock
3806 * ensures that i_cdev was already initialized.
3807 */
3808 static inline int tracing_get_cpu(struct inode *inode)
3809 {
3810 if (inode->i_cdev) /* See trace_create_cpu_file() */
3811 return (long)inode->i_cdev - 1;
3812 return RING_BUFFER_ALL_CPUS;
3813 }
3814
3815 static const struct seq_operations tracer_seq_ops = {
3816 .start = s_start,
3817 .next = s_next,
3818 .stop = s_stop,
3819 .show = s_show,
3820 };
3821
3822 static struct trace_iterator *
3823 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3824 {
3825 struct trace_array *tr = inode->i_private;
3826 struct trace_iterator *iter;
3827 int cpu;
3828
3829 if (tracing_disabled)
3830 return ERR_PTR(-ENODEV);
3831
3832 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3833 if (!iter)
3834 return ERR_PTR(-ENOMEM);
3835
3836 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3837 GFP_KERNEL);
3838 if (!iter->buffer_iter)
3839 goto release;
3840
3841 /*
3842 * We make a copy of the current tracer to avoid concurrent
3843 * changes on it while we are reading.
3844 */
3845 mutex_lock(&trace_types_lock);
3846 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3847 if (!iter->trace)
3848 goto fail;
3849
3850 *iter->trace = *tr->current_trace;
3851
3852 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3853 goto fail;
3854
3855 iter->tr = tr;
3856
3857 #ifdef CONFIG_TRACER_MAX_TRACE
3858 /* Currently only the top directory has a snapshot */
3859 if (tr->current_trace->print_max || snapshot)
3860 iter->trace_buffer = &tr->max_buffer;
3861 else
3862 #endif
3863 iter->trace_buffer = &tr->trace_buffer;
3864 iter->snapshot = snapshot;
3865 iter->pos = -1;
3866 iter->cpu_file = tracing_get_cpu(inode);
3867 mutex_init(&iter->mutex);
3868
3869 /* Notify the tracer early; before we stop tracing. */
3870 if (iter->trace && iter->trace->open)
3871 iter->trace->open(iter);
3872
3873 /* Annotate start of buffers if we had overruns */
3874 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3875 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3876
3877 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3878 if (trace_clocks[tr->clock_id].in_ns)
3879 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3880
3881 /* stop the trace while dumping if we are not opening "snapshot" */
3882 if (!iter->snapshot)
3883 tracing_stop_tr(tr);
3884
3885 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3886 for_each_tracing_cpu(cpu) {
3887 iter->buffer_iter[cpu] =
3888 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3889 }
3890 ring_buffer_read_prepare_sync();
3891 for_each_tracing_cpu(cpu) {
3892 ring_buffer_read_start(iter->buffer_iter[cpu]);
3893 tracing_iter_reset(iter, cpu);
3894 }
3895 } else {
3896 cpu = iter->cpu_file;
3897 iter->buffer_iter[cpu] =
3898 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3899 ring_buffer_read_prepare_sync();
3900 ring_buffer_read_start(iter->buffer_iter[cpu]);
3901 tracing_iter_reset(iter, cpu);
3902 }
3903
3904 mutex_unlock(&trace_types_lock);
3905
3906 return iter;
3907
3908 fail:
3909 mutex_unlock(&trace_types_lock);
3910 kfree(iter->trace);
3911 kfree(iter->buffer_iter);
3912 release:
3913 seq_release_private(inode, file);
3914 return ERR_PTR(-ENOMEM);
3915 }
3916
3917 int tracing_open_generic(struct inode *inode, struct file *filp)
3918 {
3919 if (tracing_disabled)
3920 return -ENODEV;
3921
3922 filp->private_data = inode->i_private;
3923 return 0;
3924 }
3925
3926 bool tracing_is_disabled(void)
3927 {
3928 return (tracing_disabled) ? true: false;
3929 }
3930
3931 /*
3932 * Open and update trace_array ref count.
3933 * Must have the current trace_array passed to it.
3934 */
3935 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3936 {
3937 struct trace_array *tr = inode->i_private;
3938
3939 if (tracing_disabled)
3940 return -ENODEV;
3941
3942 if (trace_array_get(tr) < 0)
3943 return -ENODEV;
3944
3945 filp->private_data = inode->i_private;
3946
3947 return 0;
3948 }
3949
3950 static int tracing_release(struct inode *inode, struct file *file)
3951 {
3952 struct trace_array *tr = inode->i_private;
3953 struct seq_file *m = file->private_data;
3954 struct trace_iterator *iter;
3955 int cpu;
3956
3957 if (!(file->f_mode & FMODE_READ)) {
3958 trace_array_put(tr);
3959 return 0;
3960 }
3961
3962 /* Writes do not use seq_file */
3963 iter = m->private;
3964 mutex_lock(&trace_types_lock);
3965
3966 for_each_tracing_cpu(cpu) {
3967 if (iter->buffer_iter[cpu])
3968 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3969 }
3970
3971 if (iter->trace && iter->trace->close)
3972 iter->trace->close(iter);
3973
3974 if (!iter->snapshot)
3975 /* reenable tracing if it was previously enabled */
3976 tracing_start_tr(tr);
3977
3978 __trace_array_put(tr);
3979
3980 mutex_unlock(&trace_types_lock);
3981
3982 mutex_destroy(&iter->mutex);
3983 free_cpumask_var(iter->started);
3984 kfree(iter->trace);
3985 kfree(iter->buffer_iter);
3986 seq_release_private(inode, file);
3987
3988 return 0;
3989 }
3990
3991 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3992 {
3993 struct trace_array *tr = inode->i_private;
3994
3995 trace_array_put(tr);
3996 return 0;
3997 }
3998
3999 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4000 {
4001 struct trace_array *tr = inode->i_private;
4002
4003 trace_array_put(tr);
4004
4005 return single_release(inode, file);
4006 }
4007
4008 static int tracing_open(struct inode *inode, struct file *file)
4009 {
4010 struct trace_array *tr = inode->i_private;
4011 struct trace_iterator *iter;
4012 int ret = 0;
4013
4014 if (trace_array_get(tr) < 0)
4015 return -ENODEV;
4016
4017 /* If this file was open for write, then erase contents */
4018 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4019 int cpu = tracing_get_cpu(inode);
4020 struct trace_buffer *trace_buf = &tr->trace_buffer;
4021
4022 #ifdef CONFIG_TRACER_MAX_TRACE
4023 if (tr->current_trace->print_max)
4024 trace_buf = &tr->max_buffer;
4025 #endif
4026
4027 if (cpu == RING_BUFFER_ALL_CPUS)
4028 tracing_reset_online_cpus(trace_buf);
4029 else
4030 tracing_reset(trace_buf, cpu);
4031 }
4032
4033 if (file->f_mode & FMODE_READ) {
4034 iter = __tracing_open(inode, file, false);
4035 if (IS_ERR(iter))
4036 ret = PTR_ERR(iter);
4037 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4038 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4039 }
4040
4041 if (ret < 0)
4042 trace_array_put(tr);
4043
4044 return ret;
4045 }
4046
4047 /*
4048 * Some tracers are not suitable for instance buffers.
4049 * A tracer is always available for the global array (toplevel)
4050 * or if it explicitly states that it is.
4051 */
4052 static bool
4053 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4054 {
4055 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4056 }
4057
4058 /* Find the next tracer that this trace array may use */
4059 static struct tracer *
4060 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4061 {
4062 while (t && !trace_ok_for_array(t, tr))
4063 t = t->next;
4064
4065 return t;
4066 }
4067
4068 static void *
4069 t_next(struct seq_file *m, void *v, loff_t *pos)
4070 {
4071 struct trace_array *tr = m->private;
4072 struct tracer *t = v;
4073
4074 (*pos)++;
4075
4076 if (t)
4077 t = get_tracer_for_array(tr, t->next);
4078
4079 return t;
4080 }
4081
4082 static void *t_start(struct seq_file *m, loff_t *pos)
4083 {
4084 struct trace_array *tr = m->private;
4085 struct tracer *t;
4086 loff_t l = 0;
4087
4088 mutex_lock(&trace_types_lock);
4089
4090 t = get_tracer_for_array(tr, trace_types);
4091 for (; t && l < *pos; t = t_next(m, t, &l))
4092 ;
4093
4094 return t;
4095 }
4096
4097 static void t_stop(struct seq_file *m, void *p)
4098 {
4099 mutex_unlock(&trace_types_lock);
4100 }
4101
4102 static int t_show(struct seq_file *m, void *v)
4103 {
4104 struct tracer *t = v;
4105
4106 if (!t)
4107 return 0;
4108
4109 seq_puts(m, t->name);
4110 if (t->next)
4111 seq_putc(m, ' ');
4112 else
4113 seq_putc(m, '\n');
4114
4115 return 0;
4116 }
4117
4118 static const struct seq_operations show_traces_seq_ops = {
4119 .start = t_start,
4120 .next = t_next,
4121 .stop = t_stop,
4122 .show = t_show,
4123 };
4124
4125 static int show_traces_open(struct inode *inode, struct file *file)
4126 {
4127 struct trace_array *tr = inode->i_private;
4128 struct seq_file *m;
4129 int ret;
4130
4131 if (tracing_disabled)
4132 return -ENODEV;
4133
4134 ret = seq_open(file, &show_traces_seq_ops);
4135 if (ret)
4136 return ret;
4137
4138 m = file->private_data;
4139 m->private = tr;
4140
4141 return 0;
4142 }
4143
4144 static ssize_t
4145 tracing_write_stub(struct file *filp, const char __user *ubuf,
4146 size_t count, loff_t *ppos)
4147 {
4148 return count;
4149 }
4150
4151 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4152 {
4153 int ret;
4154
4155 if (file->f_mode & FMODE_READ)
4156 ret = seq_lseek(file, offset, whence);
4157 else
4158 file->f_pos = ret = 0;
4159
4160 return ret;
4161 }
4162
4163 static const struct file_operations tracing_fops = {
4164 .open = tracing_open,
4165 .read = seq_read,
4166 .write = tracing_write_stub,
4167 .llseek = tracing_lseek,
4168 .release = tracing_release,
4169 };
4170
4171 static const struct file_operations show_traces_fops = {
4172 .open = show_traces_open,
4173 .read = seq_read,
4174 .release = seq_release,
4175 .llseek = seq_lseek,
4176 };
4177
4178 /*
4179 * The tracer itself will not take this lock, but still we want
4180 * to provide a consistent cpumask to user-space:
4181 */
4182 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4183
4184 /*
4185 * Temporary storage for the character representation of the
4186 * CPU bitmask (and one more byte for the newline):
4187 */
4188 static char mask_str[NR_CPUS + 1];
4189
4190 static ssize_t
4191 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4192 size_t count, loff_t *ppos)
4193 {
4194 struct trace_array *tr = file_inode(filp)->i_private;
4195 int len;
4196
4197 mutex_lock(&tracing_cpumask_update_lock);
4198
4199 len = snprintf(mask_str, count, "%*pb\n",
4200 cpumask_pr_args(tr->tracing_cpumask));
4201 if (len >= count) {
4202 count = -EINVAL;
4203 goto out_err;
4204 }
4205 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4206
4207 out_err:
4208 mutex_unlock(&tracing_cpumask_update_lock);
4209
4210 return count;
4211 }
4212
4213 static ssize_t
4214 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4215 size_t count, loff_t *ppos)
4216 {
4217 struct trace_array *tr = file_inode(filp)->i_private;
4218 cpumask_var_t tracing_cpumask_new;
4219 int err, cpu;
4220
4221 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4222 return -ENOMEM;
4223
4224 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4225 if (err)
4226 goto err_unlock;
4227
4228 mutex_lock(&tracing_cpumask_update_lock);
4229
4230 local_irq_disable();
4231 arch_spin_lock(&tr->max_lock);
4232 for_each_tracing_cpu(cpu) {
4233 /*
4234 * Increase/decrease the disabled counter if we are
4235 * about to flip a bit in the cpumask:
4236 */
4237 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4238 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4239 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4240 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4241 }
4242 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4243 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4244 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4245 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4246 }
4247 }
4248 arch_spin_unlock(&tr->max_lock);
4249 local_irq_enable();
4250
4251 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4252
4253 mutex_unlock(&tracing_cpumask_update_lock);
4254 free_cpumask_var(tracing_cpumask_new);
4255
4256 return count;
4257
4258 err_unlock:
4259 free_cpumask_var(tracing_cpumask_new);
4260
4261 return err;
4262 }
4263
4264 static const struct file_operations tracing_cpumask_fops = {
4265 .open = tracing_open_generic_tr,
4266 .read = tracing_cpumask_read,
4267 .write = tracing_cpumask_write,
4268 .release = tracing_release_generic_tr,
4269 .llseek = generic_file_llseek,
4270 };
4271
4272 static int tracing_trace_options_show(struct seq_file *m, void *v)
4273 {
4274 struct tracer_opt *trace_opts;
4275 struct trace_array *tr = m->private;
4276 u32 tracer_flags;
4277 int i;
4278
4279 mutex_lock(&trace_types_lock);
4280 tracer_flags = tr->current_trace->flags->val;
4281 trace_opts = tr->current_trace->flags->opts;
4282
4283 for (i = 0; trace_options[i]; i++) {
4284 if (tr->trace_flags & (1 << i))
4285 seq_printf(m, "%s\n", trace_options[i]);
4286 else
4287 seq_printf(m, "no%s\n", trace_options[i]);
4288 }
4289
4290 for (i = 0; trace_opts[i].name; i++) {
4291 if (tracer_flags & trace_opts[i].bit)
4292 seq_printf(m, "%s\n", trace_opts[i].name);
4293 else
4294 seq_printf(m, "no%s\n", trace_opts[i].name);
4295 }
4296 mutex_unlock(&trace_types_lock);
4297
4298 return 0;
4299 }
4300
4301 static int __set_tracer_option(struct trace_array *tr,
4302 struct tracer_flags *tracer_flags,
4303 struct tracer_opt *opts, int neg)
4304 {
4305 struct tracer *trace = tracer_flags->trace;
4306 int ret;
4307
4308 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4309 if (ret)
4310 return ret;
4311
4312 if (neg)
4313 tracer_flags->val &= ~opts->bit;
4314 else
4315 tracer_flags->val |= opts->bit;
4316 return 0;
4317 }
4318
4319 /* Try to assign a tracer specific option */
4320 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4321 {
4322 struct tracer *trace = tr->current_trace;
4323 struct tracer_flags *tracer_flags = trace->flags;
4324 struct tracer_opt *opts = NULL;
4325 int i;
4326
4327 for (i = 0; tracer_flags->opts[i].name; i++) {
4328 opts = &tracer_flags->opts[i];
4329
4330 if (strcmp(cmp, opts->name) == 0)
4331 return __set_tracer_option(tr, trace->flags, opts, neg);
4332 }
4333
4334 return -EINVAL;
4335 }
4336
4337 /* Some tracers require overwrite to stay enabled */
4338 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4339 {
4340 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4341 return -1;
4342
4343 return 0;
4344 }
4345
4346 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4347 {
4348 /* do nothing if flag is already set */
4349 if (!!(tr->trace_flags & mask) == !!enabled)
4350 return 0;
4351
4352 /* Give the tracer a chance to approve the change */
4353 if (tr->current_trace->flag_changed)
4354 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4355 return -EINVAL;
4356
4357 if (enabled)
4358 tr->trace_flags |= mask;
4359 else
4360 tr->trace_flags &= ~mask;
4361
4362 if (mask == TRACE_ITER_RECORD_CMD)
4363 trace_event_enable_cmd_record(enabled);
4364
4365 if (mask == TRACE_ITER_RECORD_TGID) {
4366 if (!tgid_map)
4367 tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4368 GFP_KERNEL);
4369 if (!tgid_map) {
4370 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4371 return -ENOMEM;
4372 }
4373
4374 trace_event_enable_tgid_record(enabled);
4375 }
4376
4377 if (mask == TRACE_ITER_EVENT_FORK)
4378 trace_event_follow_fork(tr, enabled);
4379
4380 if (mask == TRACE_ITER_FUNC_FORK)
4381 ftrace_pid_follow_fork(tr, enabled);
4382
4383 if (mask == TRACE_ITER_OVERWRITE) {
4384 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4385 #ifdef CONFIG_TRACER_MAX_TRACE
4386 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4387 #endif
4388 }
4389
4390 if (mask == TRACE_ITER_PRINTK) {
4391 trace_printk_start_stop_comm(enabled);
4392 trace_printk_control(enabled);
4393 }
4394
4395 return 0;
4396 }
4397
4398 static int trace_set_options(struct trace_array *tr, char *option)
4399 {
4400 char *cmp;
4401 int neg = 0;
4402 int ret = -ENODEV;
4403 int i;
4404 size_t orig_len = strlen(option);
4405
4406 cmp = strstrip(option);
4407
4408 if (strncmp(cmp, "no", 2) == 0) {
4409 neg = 1;
4410 cmp += 2;
4411 }
4412
4413 mutex_lock(&trace_types_lock);
4414
4415 for (i = 0; trace_options[i]; i++) {
4416 if (strcmp(cmp, trace_options[i]) == 0) {
4417 ret = set_tracer_flag(tr, 1 << i, !neg);
4418 break;
4419 }
4420 }
4421
4422 /* If no option could be set, test the specific tracer options */
4423 if (!trace_options[i])
4424 ret = set_tracer_option(tr, cmp, neg);
4425
4426 mutex_unlock(&trace_types_lock);
4427
4428 /*
4429 * If the first trailing whitespace is replaced with '\0' by strstrip,
4430 * turn it back into a space.
4431 */
4432 if (orig_len > strlen(option))
4433 option[strlen(option)] = ' ';
4434
4435 return ret;
4436 }
4437
4438 static void __init apply_trace_boot_options(void)
4439 {
4440 char *buf = trace_boot_options_buf;
4441 char *option;
4442
4443 while (true) {
4444 option = strsep(&buf, ",");
4445
4446 if (!option)
4447 break;
4448
4449 if (*option)
4450 trace_set_options(&global_trace, option);
4451
4452 /* Put back the comma to allow this to be called again */
4453 if (buf)
4454 *(buf - 1) = ',';
4455 }
4456 }
4457
4458 static ssize_t
4459 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4460 size_t cnt, loff_t *ppos)
4461 {
4462 struct seq_file *m = filp->private_data;
4463 struct trace_array *tr = m->private;
4464 char buf[64];
4465 int ret;
4466
4467 if (cnt >= sizeof(buf))
4468 return -EINVAL;
4469
4470 if (copy_from_user(buf, ubuf, cnt))
4471 return -EFAULT;
4472
4473 buf[cnt] = 0;
4474
4475 ret = trace_set_options(tr, buf);
4476 if (ret < 0)
4477 return ret;
4478
4479 *ppos += cnt;
4480
4481 return cnt;
4482 }
4483
4484 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4485 {
4486 struct trace_array *tr = inode->i_private;
4487 int ret;
4488
4489 if (tracing_disabled)
4490 return -ENODEV;
4491
4492 if (trace_array_get(tr) < 0)
4493 return -ENODEV;
4494
4495 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4496 if (ret < 0)
4497 trace_array_put(tr);
4498
4499 return ret;
4500 }
4501
4502 static const struct file_operations tracing_iter_fops = {
4503 .open = tracing_trace_options_open,
4504 .read = seq_read,
4505 .llseek = seq_lseek,
4506 .release = tracing_single_release_tr,
4507 .write = tracing_trace_options_write,
4508 };
4509
4510 static const char readme_msg[] =
4511 "tracing mini-HOWTO:\n\n"
4512 "# echo 0 > tracing_on : quick way to disable tracing\n"
4513 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4514 " Important files:\n"
4515 " trace\t\t\t- The static contents of the buffer\n"
4516 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4517 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4518 " current_tracer\t- function and latency tracers\n"
4519 " available_tracers\t- list of configured tracers for current_tracer\n"
4520 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4521 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4522 " trace_clock\t\t-change the clock used to order events\n"
4523 " local: Per cpu clock but may not be synced across CPUs\n"
4524 " global: Synced across CPUs but slows tracing down.\n"
4525 " counter: Not a clock, but just an increment\n"
4526 " uptime: Jiffy counter from time of boot\n"
4527 " perf: Same clock that perf events use\n"
4528 #ifdef CONFIG_X86_64
4529 " x86-tsc: TSC cycle counter\n"
4530 #endif
4531 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4532 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4533 " tracing_cpumask\t- Limit which CPUs to trace\n"
4534 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4535 "\t\t\t Remove sub-buffer with rmdir\n"
4536 " trace_options\t\t- Set format or modify how tracing happens\n"
4537 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4538 "\t\t\t option name\n"
4539 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4540 #ifdef CONFIG_DYNAMIC_FTRACE
4541 "\n available_filter_functions - list of functions that can be filtered on\n"
4542 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4543 "\t\t\t functions\n"
4544 "\t accepts: func_full_name or glob-matching-pattern\n"
4545 "\t modules: Can select a group via module\n"
4546 "\t Format: :mod:<module-name>\n"
4547 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4548 "\t triggers: a command to perform when function is hit\n"
4549 "\t Format: <function>:<trigger>[:count]\n"
4550 "\t trigger: traceon, traceoff\n"
4551 "\t\t enable_event:<system>:<event>\n"
4552 "\t\t disable_event:<system>:<event>\n"
4553 #ifdef CONFIG_STACKTRACE
4554 "\t\t stacktrace\n"
4555 #endif
4556 #ifdef CONFIG_TRACER_SNAPSHOT
4557 "\t\t snapshot\n"
4558 #endif
4559 "\t\t dump\n"
4560 "\t\t cpudump\n"
4561 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4562 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4563 "\t The first one will disable tracing every time do_fault is hit\n"
4564 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4565 "\t The first time do trap is hit and it disables tracing, the\n"
4566 "\t counter will decrement to 2. If tracing is already disabled,\n"
4567 "\t the counter will not decrement. It only decrements when the\n"
4568 "\t trigger did work\n"
4569 "\t To remove trigger without count:\n"
4570 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4571 "\t To remove trigger with a count:\n"
4572 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4573 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4574 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4575 "\t modules: Can select a group via module command :mod:\n"
4576 "\t Does not accept triggers\n"
4577 #endif /* CONFIG_DYNAMIC_FTRACE */
4578 #ifdef CONFIG_FUNCTION_TRACER
4579 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4580 "\t\t (function)\n"
4581 #endif
4582 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4583 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4584 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4585 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4586 #endif
4587 #ifdef CONFIG_TRACER_SNAPSHOT
4588 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4589 "\t\t\t snapshot buffer. Read the contents for more\n"
4590 "\t\t\t information\n"
4591 #endif
4592 #ifdef CONFIG_STACK_TRACER
4593 " stack_trace\t\t- Shows the max stack trace when active\n"
4594 " stack_max_size\t- Shows current max stack size that was traced\n"
4595 "\t\t\t Write into this file to reset the max size (trigger a\n"
4596 "\t\t\t new trace)\n"
4597 #ifdef CONFIG_DYNAMIC_FTRACE
4598 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4599 "\t\t\t traces\n"
4600 #endif
4601 #endif /* CONFIG_STACK_TRACER */
4602 #ifdef CONFIG_KPROBE_EVENTS
4603 " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4604 "\t\t\t Write into this file to define/undefine new trace events.\n"
4605 #endif
4606 #ifdef CONFIG_UPROBE_EVENTS
4607 " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4608 "\t\t\t Write into this file to define/undefine new trace events.\n"
4609 #endif
4610 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4611 "\t accepts: event-definitions (one definition per line)\n"
4612 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
4613 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4614 "\t -:[<group>/]<event>\n"
4615 #ifdef CONFIG_KPROBE_EVENTS
4616 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4617 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4618 #endif
4619 #ifdef CONFIG_UPROBE_EVENTS
4620 "\t place: <path>:<offset>\n"
4621 #endif
4622 "\t args: <name>=fetcharg[:type]\n"
4623 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4624 "\t $stack<index>, $stack, $retval, $comm\n"
4625 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4626 "\t b<bit-width>@<bit-offset>/<container-size>\n"
4627 #endif
4628 " events/\t\t- Directory containing all trace event subsystems:\n"
4629 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4630 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4631 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4632 "\t\t\t events\n"
4633 " filter\t\t- If set, only events passing filter are traced\n"
4634 " events/<system>/<event>/\t- Directory containing control files for\n"
4635 "\t\t\t <event>:\n"
4636 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4637 " filter\t\t- If set, only events passing filter are traced\n"
4638 " trigger\t\t- If set, a command to perform when event is hit\n"
4639 "\t Format: <trigger>[:count][if <filter>]\n"
4640 "\t trigger: traceon, traceoff\n"
4641 "\t enable_event:<system>:<event>\n"
4642 "\t disable_event:<system>:<event>\n"
4643 #ifdef CONFIG_HIST_TRIGGERS
4644 "\t enable_hist:<system>:<event>\n"
4645 "\t disable_hist:<system>:<event>\n"
4646 #endif
4647 #ifdef CONFIG_STACKTRACE
4648 "\t\t stacktrace\n"
4649 #endif
4650 #ifdef CONFIG_TRACER_SNAPSHOT
4651 "\t\t snapshot\n"
4652 #endif
4653 #ifdef CONFIG_HIST_TRIGGERS
4654 "\t\t hist (see below)\n"
4655 #endif
4656 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4657 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4658 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4659 "\t events/block/block_unplug/trigger\n"
4660 "\t The first disables tracing every time block_unplug is hit.\n"
4661 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4662 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4663 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4664 "\t Like function triggers, the counter is only decremented if it\n"
4665 "\t enabled or disabled tracing.\n"
4666 "\t To remove a trigger without a count:\n"
4667 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4668 "\t To remove a trigger with a count:\n"
4669 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4670 "\t Filters can be ignored when removing a trigger.\n"
4671 #ifdef CONFIG_HIST_TRIGGERS
4672 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4673 "\t Format: hist:keys=<field1[,field2,...]>\n"
4674 "\t [:values=<field1[,field2,...]>]\n"
4675 "\t [:sort=<field1[,field2,...]>]\n"
4676 "\t [:size=#entries]\n"
4677 "\t [:pause][:continue][:clear]\n"
4678 "\t [:name=histname1]\n"
4679 "\t [if <filter>]\n\n"
4680 "\t When a matching event is hit, an entry is added to a hash\n"
4681 "\t table using the key(s) and value(s) named, and the value of a\n"
4682 "\t sum called 'hitcount' is incremented. Keys and values\n"
4683 "\t correspond to fields in the event's format description. Keys\n"
4684 "\t can be any field, or the special string 'stacktrace'.\n"
4685 "\t Compound keys consisting of up to two fields can be specified\n"
4686 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4687 "\t fields. Sort keys consisting of up to two fields can be\n"
4688 "\t specified using the 'sort' keyword. The sort direction can\n"
4689 "\t be modified by appending '.descending' or '.ascending' to a\n"
4690 "\t sort field. The 'size' parameter can be used to specify more\n"
4691 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4692 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4693 "\t its histogram data will be shared with other triggers of the\n"
4694 "\t same name, and trigger hits will update this common data.\n\n"
4695 "\t Reading the 'hist' file for the event will dump the hash\n"
4696 "\t table in its entirety to stdout. If there are multiple hist\n"
4697 "\t triggers attached to an event, there will be a table for each\n"
4698 "\t trigger in the output. The table displayed for a named\n"
4699 "\t trigger will be the same as any other instance having the\n"
4700 "\t same name. The default format used to display a given field\n"
4701 "\t can be modified by appending any of the following modifiers\n"
4702 "\t to the field name, as applicable:\n\n"
4703 "\t .hex display a number as a hex value\n"
4704 "\t .sym display an address as a symbol\n"
4705 "\t .sym-offset display an address as a symbol and offset\n"
4706 "\t .execname display a common_pid as a program name\n"
4707 "\t .syscall display a syscall id as a syscall name\n\n"
4708 "\t .log2 display log2 value rather than raw number\n\n"
4709 "\t The 'pause' parameter can be used to pause an existing hist\n"
4710 "\t trigger or to start a hist trigger but not log any events\n"
4711 "\t until told to do so. 'continue' can be used to start or\n"
4712 "\t restart a paused hist trigger.\n\n"
4713 "\t The 'clear' parameter will clear the contents of a running\n"
4714 "\t hist trigger and leave its current paused/active state\n"
4715 "\t unchanged.\n\n"
4716 "\t The enable_hist and disable_hist triggers can be used to\n"
4717 "\t have one event conditionally start and stop another event's\n"
4718 "\t already-attached hist trigger. The syntax is analagous to\n"
4719 "\t the enable_event and disable_event triggers.\n"
4720 #endif
4721 ;
4722
4723 static ssize_t
4724 tracing_readme_read(struct file *filp, char __user *ubuf,
4725 size_t cnt, loff_t *ppos)
4726 {
4727 return simple_read_from_buffer(ubuf, cnt, ppos,
4728 readme_msg, strlen(readme_msg));
4729 }
4730
4731 static const struct file_operations tracing_readme_fops = {
4732 .open = tracing_open_generic,
4733 .read = tracing_readme_read,
4734 .llseek = generic_file_llseek,
4735 };
4736
4737 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4738 {
4739 int *ptr = v;
4740
4741 if (*pos || m->count)
4742 ptr++;
4743
4744 (*pos)++;
4745
4746 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4747 if (trace_find_tgid(*ptr))
4748 return ptr;
4749 }
4750
4751 return NULL;
4752 }
4753
4754 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4755 {
4756 void *v;
4757 loff_t l = 0;
4758
4759 if (!tgid_map)
4760 return NULL;
4761
4762 v = &tgid_map[0];
4763 while (l <= *pos) {
4764 v = saved_tgids_next(m, v, &l);
4765 if (!v)
4766 return NULL;
4767 }
4768
4769 return v;
4770 }
4771
4772 static void saved_tgids_stop(struct seq_file *m, void *v)
4773 {
4774 }
4775
4776 static int saved_tgids_show(struct seq_file *m, void *v)
4777 {
4778 int pid = (int *)v - tgid_map;
4779
4780 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4781 return 0;
4782 }
4783
4784 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4785 .start = saved_tgids_start,
4786 .stop = saved_tgids_stop,
4787 .next = saved_tgids_next,
4788 .show = saved_tgids_show,
4789 };
4790
4791 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4792 {
4793 if (tracing_disabled)
4794 return -ENODEV;
4795
4796 return seq_open(filp, &tracing_saved_tgids_seq_ops);
4797 }
4798
4799
4800 static const struct file_operations tracing_saved_tgids_fops = {
4801 .open = tracing_saved_tgids_open,
4802 .read = seq_read,
4803 .llseek = seq_lseek,
4804 .release = seq_release,
4805 };
4806
4807 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4808 {
4809 unsigned int *ptr = v;
4810
4811 if (*pos || m->count)
4812 ptr++;
4813
4814 (*pos)++;
4815
4816 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4817 ptr++) {
4818 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4819 continue;
4820
4821 return ptr;
4822 }
4823
4824 return NULL;
4825 }
4826
4827 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4828 {
4829 void *v;
4830 loff_t l = 0;
4831
4832 preempt_disable();
4833 arch_spin_lock(&trace_cmdline_lock);
4834
4835 v = &savedcmd->map_cmdline_to_pid[0];
4836 while (l <= *pos) {
4837 v = saved_cmdlines_next(m, v, &l);
4838 if (!v)
4839 return NULL;
4840 }
4841
4842 return v;
4843 }
4844
4845 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4846 {
4847 arch_spin_unlock(&trace_cmdline_lock);
4848 preempt_enable();
4849 }
4850
4851 static int saved_cmdlines_show(struct seq_file *m, void *v)
4852 {
4853 char buf[TASK_COMM_LEN];
4854 unsigned int *pid = v;
4855
4856 __trace_find_cmdline(*pid, buf);
4857 seq_printf(m, "%d %s\n", *pid, buf);
4858 return 0;
4859 }
4860
4861 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4862 .start = saved_cmdlines_start,
4863 .next = saved_cmdlines_next,
4864 .stop = saved_cmdlines_stop,
4865 .show = saved_cmdlines_show,
4866 };
4867
4868 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4869 {
4870 if (tracing_disabled)
4871 return -ENODEV;
4872
4873 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4874 }
4875
4876 static const struct file_operations tracing_saved_cmdlines_fops = {
4877 .open = tracing_saved_cmdlines_open,
4878 .read = seq_read,
4879 .llseek = seq_lseek,
4880 .release = seq_release,
4881 };
4882
4883 static ssize_t
4884 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4885 size_t cnt, loff_t *ppos)
4886 {
4887 char buf[64];
4888 int r;
4889
4890 arch_spin_lock(&trace_cmdline_lock);
4891 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4892 arch_spin_unlock(&trace_cmdline_lock);
4893
4894 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4895 }
4896
4897 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4898 {
4899 kfree(s->saved_cmdlines);
4900 kfree(s->map_cmdline_to_pid);
4901 kfree(s);
4902 }
4903
4904 static int tracing_resize_saved_cmdlines(unsigned int val)
4905 {
4906 struct saved_cmdlines_buffer *s, *savedcmd_temp;
4907
4908 s = kmalloc(sizeof(*s), GFP_KERNEL);
4909 if (!s)
4910 return -ENOMEM;
4911
4912 if (allocate_cmdlines_buffer(val, s) < 0) {
4913 kfree(s);
4914 return -ENOMEM;
4915 }
4916
4917 arch_spin_lock(&trace_cmdline_lock);
4918 savedcmd_temp = savedcmd;
4919 savedcmd = s;
4920 arch_spin_unlock(&trace_cmdline_lock);
4921 free_saved_cmdlines_buffer(savedcmd_temp);
4922
4923 return 0;
4924 }
4925
4926 static ssize_t
4927 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4928 size_t cnt, loff_t *ppos)
4929 {
4930 unsigned long val;
4931 int ret;
4932
4933 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4934 if (ret)
4935 return ret;
4936
4937 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4938 if (!val || val > PID_MAX_DEFAULT)
4939 return -EINVAL;
4940
4941 ret = tracing_resize_saved_cmdlines((unsigned int)val);
4942 if (ret < 0)
4943 return ret;
4944
4945 *ppos += cnt;
4946
4947 return cnt;
4948 }
4949
4950 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4951 .open = tracing_open_generic,
4952 .read = tracing_saved_cmdlines_size_read,
4953 .write = tracing_saved_cmdlines_size_write,
4954 };
4955
4956 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4957 static union trace_eval_map_item *
4958 update_eval_map(union trace_eval_map_item *ptr)
4959 {
4960 if (!ptr->map.eval_string) {
4961 if (ptr->tail.next) {
4962 ptr = ptr->tail.next;
4963 /* Set ptr to the next real item (skip head) */
4964 ptr++;
4965 } else
4966 return NULL;
4967 }
4968 return ptr;
4969 }
4970
4971 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4972 {
4973 union trace_eval_map_item *ptr = v;
4974
4975 /*
4976 * Paranoid! If ptr points to end, we don't want to increment past it.
4977 * This really should never happen.
4978 */
4979 ptr = update_eval_map(ptr);
4980 if (WARN_ON_ONCE(!ptr))
4981 return NULL;
4982
4983 ptr++;
4984
4985 (*pos)++;
4986
4987 ptr = update_eval_map(ptr);
4988
4989 return ptr;
4990 }
4991
4992 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4993 {
4994 union trace_eval_map_item *v;
4995 loff_t l = 0;
4996
4997 mutex_lock(&trace_eval_mutex);
4998
4999 v = trace_eval_maps;
5000 if (v)
5001 v++;
5002
5003 while (v && l < *pos) {
5004 v = eval_map_next(m, v, &l);
5005 }
5006
5007 return v;
5008 }
5009
5010 static void eval_map_stop(struct seq_file *m, void *v)
5011 {
5012 mutex_unlock(&trace_eval_mutex);
5013 }
5014
5015 static int eval_map_show(struct seq_file *m, void *v)
5016 {
5017 union trace_eval_map_item *ptr = v;
5018
5019 seq_printf(m, "%s %ld (%s)\n",
5020 ptr->map.eval_string, ptr->map.eval_value,
5021 ptr->map.system);
5022
5023 return 0;
5024 }
5025
5026 static const struct seq_operations tracing_eval_map_seq_ops = {
5027 .start = eval_map_start,
5028 .next = eval_map_next,
5029 .stop = eval_map_stop,
5030 .show = eval_map_show,
5031 };
5032
5033 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5034 {
5035 if (tracing_disabled)
5036 return -ENODEV;
5037
5038 return seq_open(filp, &tracing_eval_map_seq_ops);
5039 }
5040
5041 static const struct file_operations tracing_eval_map_fops = {
5042 .open = tracing_eval_map_open,
5043 .read = seq_read,
5044 .llseek = seq_lseek,
5045 .release = seq_release,
5046 };
5047
5048 static inline union trace_eval_map_item *
5049 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5050 {
5051 /* Return tail of array given the head */
5052 return ptr + ptr->head.length + 1;
5053 }
5054
5055 static void
5056 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5057 int len)
5058 {
5059 struct trace_eval_map **stop;
5060 struct trace_eval_map **map;
5061 union trace_eval_map_item *map_array;
5062 union trace_eval_map_item *ptr;
5063
5064 stop = start + len;
5065
5066 /*
5067 * The trace_eval_maps contains the map plus a head and tail item,
5068 * where the head holds the module and length of array, and the
5069 * tail holds a pointer to the next list.
5070 */
5071 map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5072 if (!map_array) {
5073 pr_warn("Unable to allocate trace eval mapping\n");
5074 return;
5075 }
5076
5077 mutex_lock(&trace_eval_mutex);
5078
5079 if (!trace_eval_maps)
5080 trace_eval_maps = map_array;
5081 else {
5082 ptr = trace_eval_maps;
5083 for (;;) {
5084 ptr = trace_eval_jmp_to_tail(ptr);
5085 if (!ptr->tail.next)
5086 break;
5087 ptr = ptr->tail.next;
5088
5089 }
5090 ptr->tail.next = map_array;
5091 }
5092 map_array->head.mod = mod;
5093 map_array->head.length = len;
5094 map_array++;
5095
5096 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5097 map_array->map = **map;
5098 map_array++;
5099 }
5100 memset(map_array, 0, sizeof(*map_array));
5101
5102 mutex_unlock(&trace_eval_mutex);
5103 }
5104
5105 static void trace_create_eval_file(struct dentry *d_tracer)
5106 {
5107 trace_create_file("eval_map", 0444, d_tracer,
5108 NULL, &tracing_eval_map_fops);
5109 }
5110
5111 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5112 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5113 static inline void trace_insert_eval_map_file(struct module *mod,
5114 struct trace_eval_map **start, int len) { }
5115 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5116
5117 static void trace_insert_eval_map(struct module *mod,
5118 struct trace_eval_map **start, int len)
5119 {
5120 struct trace_eval_map **map;
5121
5122 if (len <= 0)
5123 return;
5124
5125 map = start;
5126
5127 trace_event_eval_update(map, len);
5128
5129 trace_insert_eval_map_file(mod, start, len);
5130 }
5131
5132 static ssize_t
5133 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5134 size_t cnt, loff_t *ppos)
5135 {
5136 struct trace_array *tr = filp->private_data;
5137 char buf[MAX_TRACER_SIZE+2];
5138 int r;
5139
5140 mutex_lock(&trace_types_lock);
5141 r = sprintf(buf, "%s\n", tr->current_trace->name);
5142 mutex_unlock(&trace_types_lock);
5143
5144 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5145 }
5146
5147 int tracer_init(struct tracer *t, struct trace_array *tr)
5148 {
5149 tracing_reset_online_cpus(&tr->trace_buffer);
5150 return t->init(tr);
5151 }
5152
5153 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5154 {
5155 int cpu;
5156
5157 for_each_tracing_cpu(cpu)
5158 per_cpu_ptr(buf->data, cpu)->entries = val;
5159 }
5160
5161 #ifdef CONFIG_TRACER_MAX_TRACE
5162 /* resize @tr's buffer to the size of @size_tr's entries */
5163 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5164 struct trace_buffer *size_buf, int cpu_id)
5165 {
5166 int cpu, ret = 0;
5167
5168 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5169 for_each_tracing_cpu(cpu) {
5170 ret = ring_buffer_resize(trace_buf->buffer,
5171 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5172 if (ret < 0)
5173 break;
5174 per_cpu_ptr(trace_buf->data, cpu)->entries =
5175 per_cpu_ptr(size_buf->data, cpu)->entries;
5176 }
5177 } else {
5178 ret = ring_buffer_resize(trace_buf->buffer,
5179 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5180 if (ret == 0)
5181 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5182 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5183 }
5184
5185 return ret;
5186 }
5187 #endif /* CONFIG_TRACER_MAX_TRACE */
5188
5189 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5190 unsigned long size, int cpu)
5191 {
5192 int ret;
5193
5194 /*
5195 * If kernel or user changes the size of the ring buffer
5196 * we use the size that was given, and we can forget about
5197 * expanding it later.
5198 */
5199 ring_buffer_expanded = true;
5200
5201 /* May be called before buffers are initialized */
5202 if (!tr->trace_buffer.buffer)
5203 return 0;
5204
5205 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5206 if (ret < 0)
5207 return ret;
5208
5209 #ifdef CONFIG_TRACER_MAX_TRACE
5210 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5211 !tr->current_trace->use_max_tr)
5212 goto out;
5213
5214 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5215 if (ret < 0) {
5216 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5217 &tr->trace_buffer, cpu);
5218 if (r < 0) {
5219 /*
5220 * AARGH! We are left with different
5221 * size max buffer!!!!
5222 * The max buffer is our "snapshot" buffer.
5223 * When a tracer needs a snapshot (one of the
5224 * latency tracers), it swaps the max buffer
5225 * with the saved snap shot. We succeeded to
5226 * update the size of the main buffer, but failed to
5227 * update the size of the max buffer. But when we tried
5228 * to reset the main buffer to the original size, we
5229 * failed there too. This is very unlikely to
5230 * happen, but if it does, warn and kill all
5231 * tracing.
5232 */
5233 WARN_ON(1);
5234 tracing_disabled = 1;
5235 }
5236 return ret;
5237 }
5238
5239 if (cpu == RING_BUFFER_ALL_CPUS)
5240 set_buffer_entries(&tr->max_buffer, size);
5241 else
5242 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5243
5244 out:
5245 #endif /* CONFIG_TRACER_MAX_TRACE */
5246
5247 if (cpu == RING_BUFFER_ALL_CPUS)
5248 set_buffer_entries(&tr->trace_buffer, size);
5249 else
5250 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5251
5252 return ret;
5253 }
5254
5255 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5256 unsigned long size, int cpu_id)
5257 {
5258 int ret = size;
5259
5260 mutex_lock(&trace_types_lock);
5261
5262 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5263 /* make sure, this cpu is enabled in the mask */
5264 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5265 ret = -EINVAL;
5266 goto out;
5267 }
5268 }
5269
5270 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5271 if (ret < 0)
5272 ret = -ENOMEM;
5273
5274 out:
5275 mutex_unlock(&trace_types_lock);
5276
5277 return ret;
5278 }
5279
5280
5281 /**
5282 * tracing_update_buffers - used by tracing facility to expand ring buffers
5283 *
5284 * To save on memory when the tracing is never used on a system with it
5285 * configured in. The ring buffers are set to a minimum size. But once
5286 * a user starts to use the tracing facility, then they need to grow
5287 * to their default size.
5288 *
5289 * This function is to be called when a tracer is about to be used.
5290 */
5291 int tracing_update_buffers(void)
5292 {
5293 int ret = 0;
5294
5295 mutex_lock(&trace_types_lock);
5296 if (!ring_buffer_expanded)
5297 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5298 RING_BUFFER_ALL_CPUS);
5299 mutex_unlock(&trace_types_lock);
5300
5301 return ret;
5302 }
5303
5304 struct trace_option_dentry;
5305
5306 static void
5307 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5308
5309 /*
5310 * Used to clear out the tracer before deletion of an instance.
5311 * Must have trace_types_lock held.
5312 */
5313 static void tracing_set_nop(struct trace_array *tr)
5314 {
5315 if (tr->current_trace == &nop_trace)
5316 return;
5317
5318 tr->current_trace->enabled--;
5319
5320 if (tr->current_trace->reset)
5321 tr->current_trace->reset(tr);
5322
5323 tr->current_trace = &nop_trace;
5324 }
5325
5326 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5327 {
5328 /* Only enable if the directory has been created already. */
5329 if (!tr->dir)
5330 return;
5331
5332 create_trace_option_files(tr, t);
5333 }
5334
5335 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5336 {
5337 struct tracer *t;
5338 #ifdef CONFIG_TRACER_MAX_TRACE
5339 bool had_max_tr;
5340 #endif
5341 int ret = 0;
5342
5343 mutex_lock(&trace_types_lock);
5344
5345 if (!ring_buffer_expanded) {
5346 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5347 RING_BUFFER_ALL_CPUS);
5348 if (ret < 0)
5349 goto out;
5350 ret = 0;
5351 }
5352
5353 for (t = trace_types; t; t = t->next) {
5354 if (strcmp(t->name, buf) == 0)
5355 break;
5356 }
5357 if (!t) {
5358 ret = -EINVAL;
5359 goto out;
5360 }
5361 if (t == tr->current_trace)
5362 goto out;
5363
5364 /* Some tracers are only allowed for the top level buffer */
5365 if (!trace_ok_for_array(t, tr)) {
5366 ret = -EINVAL;
5367 goto out;
5368 }
5369
5370 /* If trace pipe files are being read, we can't change the tracer */
5371 if (tr->current_trace->ref) {
5372 ret = -EBUSY;
5373 goto out;
5374 }
5375
5376 trace_branch_disable();
5377
5378 tr->current_trace->enabled--;
5379
5380 if (tr->current_trace->reset)
5381 tr->current_trace->reset(tr);
5382
5383 /* Current trace needs to be nop_trace before synchronize_sched */
5384 tr->current_trace = &nop_trace;
5385
5386 #ifdef CONFIG_TRACER_MAX_TRACE
5387 had_max_tr = tr->allocated_snapshot;
5388
5389 if (had_max_tr && !t->use_max_tr) {
5390 /*
5391 * We need to make sure that the update_max_tr sees that
5392 * current_trace changed to nop_trace to keep it from
5393 * swapping the buffers after we resize it.
5394 * The update_max_tr is called from interrupts disabled
5395 * so a synchronized_sched() is sufficient.
5396 */
5397 synchronize_sched();
5398 free_snapshot(tr);
5399 }
5400 #endif
5401
5402 #ifdef CONFIG_TRACER_MAX_TRACE
5403 if (t->use_max_tr && !had_max_tr) {
5404 ret = alloc_snapshot(tr);
5405 if (ret < 0)
5406 goto out;
5407 }
5408 #endif
5409
5410 if (t->init) {
5411 ret = tracer_init(t, tr);
5412 if (ret)
5413 goto out;
5414 }
5415
5416 tr->current_trace = t;
5417 tr->current_trace->enabled++;
5418 trace_branch_enable(tr);
5419 out:
5420 mutex_unlock(&trace_types_lock);
5421
5422 return ret;
5423 }
5424
5425 static ssize_t
5426 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5427 size_t cnt, loff_t *ppos)
5428 {
5429 struct trace_array *tr = filp->private_data;
5430 char buf[MAX_TRACER_SIZE+1];
5431 int i;
5432 size_t ret;
5433 int err;
5434
5435 ret = cnt;
5436
5437 if (cnt > MAX_TRACER_SIZE)
5438 cnt = MAX_TRACER_SIZE;
5439
5440 if (copy_from_user(buf, ubuf, cnt))
5441 return -EFAULT;
5442
5443 buf[cnt] = 0;
5444
5445 /* strip ending whitespace. */
5446 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5447 buf[i] = 0;
5448
5449 err = tracing_set_tracer(tr, buf);
5450 if (err)
5451 return err;
5452
5453 *ppos += ret;
5454
5455 return ret;
5456 }
5457
5458 static ssize_t
5459 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5460 size_t cnt, loff_t *ppos)
5461 {
5462 char buf[64];
5463 int r;
5464
5465 r = snprintf(buf, sizeof(buf), "%ld\n",
5466 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5467 if (r > sizeof(buf))
5468 r = sizeof(buf);
5469 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5470 }
5471
5472 static ssize_t
5473 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5474 size_t cnt, loff_t *ppos)
5475 {
5476 unsigned long val;
5477 int ret;
5478
5479 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5480 if (ret)
5481 return ret;
5482
5483 *ptr = val * 1000;
5484
5485 return cnt;
5486 }
5487
5488 static ssize_t
5489 tracing_thresh_read(struct file *filp, char __user *ubuf,
5490 size_t cnt, loff_t *ppos)
5491 {
5492 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5493 }
5494
5495 static ssize_t
5496 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5497 size_t cnt, loff_t *ppos)
5498 {
5499 struct trace_array *tr = filp->private_data;
5500 int ret;
5501
5502 mutex_lock(&trace_types_lock);
5503 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5504 if (ret < 0)
5505 goto out;
5506
5507 if (tr->current_trace->update_thresh) {
5508 ret = tr->current_trace->update_thresh(tr);
5509 if (ret < 0)
5510 goto out;
5511 }
5512
5513 ret = cnt;
5514 out:
5515 mutex_unlock(&trace_types_lock);
5516
5517 return ret;
5518 }
5519
5520 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5521
5522 static ssize_t
5523 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5524 size_t cnt, loff_t *ppos)
5525 {
5526 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5527 }
5528
5529 static ssize_t
5530 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5531 size_t cnt, loff_t *ppos)
5532 {
5533 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5534 }
5535
5536 #endif
5537
5538 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5539 {
5540 struct trace_array *tr = inode->i_private;
5541 struct trace_iterator *iter;
5542 int ret = 0;
5543
5544 if (tracing_disabled)
5545 return -ENODEV;
5546
5547 if (trace_array_get(tr) < 0)
5548 return -ENODEV;
5549
5550 mutex_lock(&trace_types_lock);
5551
5552 /* create a buffer to store the information to pass to userspace */
5553 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5554 if (!iter) {
5555 ret = -ENOMEM;
5556 __trace_array_put(tr);
5557 goto out;
5558 }
5559
5560 trace_seq_init(&iter->seq);
5561 iter->trace = tr->current_trace;
5562
5563 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5564 ret = -ENOMEM;
5565 goto fail;
5566 }
5567
5568 /* trace pipe does not show start of buffer */
5569 cpumask_setall(iter->started);
5570
5571 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5572 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5573
5574 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5575 if (trace_clocks[tr->clock_id].in_ns)
5576 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5577
5578 iter->tr = tr;
5579 iter->trace_buffer = &tr->trace_buffer;
5580 iter->cpu_file = tracing_get_cpu(inode);
5581 mutex_init(&iter->mutex);
5582 filp->private_data = iter;
5583
5584 if (iter->trace->pipe_open)
5585 iter->trace->pipe_open(iter);
5586
5587 nonseekable_open(inode, filp);
5588
5589 tr->current_trace->ref++;
5590 out:
5591 mutex_unlock(&trace_types_lock);
5592 return ret;
5593
5594 fail:
5595 kfree(iter->trace);
5596 kfree(iter);
5597 __trace_array_put(tr);
5598 mutex_unlock(&trace_types_lock);
5599 return ret;
5600 }
5601
5602 static int tracing_release_pipe(struct inode *inode, struct file *file)
5603 {
5604 struct trace_iterator *iter = file->private_data;
5605 struct trace_array *tr = inode->i_private;
5606
5607 mutex_lock(&trace_types_lock);
5608
5609 tr->current_trace->ref--;
5610
5611 if (iter->trace->pipe_close)
5612 iter->trace->pipe_close(iter);
5613
5614 mutex_unlock(&trace_types_lock);
5615
5616 free_cpumask_var(iter->started);
5617 mutex_destroy(&iter->mutex);
5618 kfree(iter);
5619
5620 trace_array_put(tr);
5621
5622 return 0;
5623 }
5624
5625 static unsigned int
5626 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5627 {
5628 struct trace_array *tr = iter->tr;
5629
5630 /* Iterators are static, they should be filled or empty */
5631 if (trace_buffer_iter(iter, iter->cpu_file))
5632 return POLLIN | POLLRDNORM;
5633
5634 if (tr->trace_flags & TRACE_ITER_BLOCK)
5635 /*
5636 * Always select as readable when in blocking mode
5637 */
5638 return POLLIN | POLLRDNORM;
5639 else
5640 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5641 filp, poll_table);
5642 }
5643
5644 static unsigned int
5645 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5646 {
5647 struct trace_iterator *iter = filp->private_data;
5648
5649 return trace_poll(iter, filp, poll_table);
5650 }
5651
5652 /* Must be called with iter->mutex held. */
5653 static int tracing_wait_pipe(struct file *filp)
5654 {
5655 struct trace_iterator *iter = filp->private_data;
5656 int ret;
5657
5658 while (trace_empty(iter)) {
5659
5660 if ((filp->f_flags & O_NONBLOCK)) {
5661 return -EAGAIN;
5662 }
5663
5664 /*
5665 * We block until we read something and tracing is disabled.
5666 * We still block if tracing is disabled, but we have never
5667 * read anything. This allows a user to cat this file, and
5668 * then enable tracing. But after we have read something,
5669 * we give an EOF when tracing is again disabled.
5670 *
5671 * iter->pos will be 0 if we haven't read anything.
5672 */
5673 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5674 break;
5675
5676 mutex_unlock(&iter->mutex);
5677
5678 ret = wait_on_pipe(iter, false);
5679
5680 mutex_lock(&iter->mutex);
5681
5682 if (ret)
5683 return ret;
5684 }
5685
5686 return 1;
5687 }
5688
5689 /*
5690 * Consumer reader.
5691 */
5692 static ssize_t
5693 tracing_read_pipe(struct file *filp, char __user *ubuf,
5694 size_t cnt, loff_t *ppos)
5695 {
5696 struct trace_iterator *iter = filp->private_data;
5697 ssize_t sret;
5698
5699 /*
5700 * Avoid more than one consumer on a single file descriptor
5701 * This is just a matter of traces coherency, the ring buffer itself
5702 * is protected.
5703 */
5704 mutex_lock(&iter->mutex);
5705
5706 /* return any leftover data */
5707 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5708 if (sret != -EBUSY)
5709 goto out;
5710
5711 trace_seq_init(&iter->seq);
5712
5713 if (iter->trace->read) {
5714 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5715 if (sret)
5716 goto out;
5717 }
5718
5719 waitagain:
5720 sret = tracing_wait_pipe(filp);
5721 if (sret <= 0)
5722 goto out;
5723
5724 /* stop when tracing is finished */
5725 if (trace_empty(iter)) {
5726 sret = 0;
5727 goto out;
5728 }
5729
5730 if (cnt >= PAGE_SIZE)
5731 cnt = PAGE_SIZE - 1;
5732
5733 /* reset all but tr, trace, and overruns */
5734 memset(&iter->seq, 0,
5735 sizeof(struct trace_iterator) -
5736 offsetof(struct trace_iterator, seq));
5737 cpumask_clear(iter->started);
5738 iter->pos = -1;
5739
5740 trace_event_read_lock();
5741 trace_access_lock(iter->cpu_file);
5742 while (trace_find_next_entry_inc(iter) != NULL) {
5743 enum print_line_t ret;
5744 int save_len = iter->seq.seq.len;
5745
5746 ret = print_trace_line(iter);
5747 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5748 /* don't print partial lines */
5749 iter->seq.seq.len = save_len;
5750 break;
5751 }
5752 if (ret != TRACE_TYPE_NO_CONSUME)
5753 trace_consume(iter);
5754
5755 if (trace_seq_used(&iter->seq) >= cnt)
5756 break;
5757
5758 /*
5759 * Setting the full flag means we reached the trace_seq buffer
5760 * size and we should leave by partial output condition above.
5761 * One of the trace_seq_* functions is not used properly.
5762 */
5763 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5764 iter->ent->type);
5765 }
5766 trace_access_unlock(iter->cpu_file);
5767 trace_event_read_unlock();
5768
5769 /* Now copy what we have to the user */
5770 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5771 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5772 trace_seq_init(&iter->seq);
5773
5774 /*
5775 * If there was nothing to send to user, in spite of consuming trace
5776 * entries, go back to wait for more entries.
5777 */
5778 if (sret == -EBUSY)
5779 goto waitagain;
5780
5781 out:
5782 mutex_unlock(&iter->mutex);
5783
5784 return sret;
5785 }
5786
5787 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5788 unsigned int idx)
5789 {
5790 __free_page(spd->pages[idx]);
5791 }
5792
5793 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5794 .can_merge = 0,
5795 .confirm = generic_pipe_buf_confirm,
5796 .release = generic_pipe_buf_release,
5797 .steal = generic_pipe_buf_steal,
5798 .get = generic_pipe_buf_get,
5799 };
5800
5801 static size_t
5802 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5803 {
5804 size_t count;
5805 int save_len;
5806 int ret;
5807
5808 /* Seq buffer is page-sized, exactly what we need. */
5809 for (;;) {
5810 save_len = iter->seq.seq.len;
5811 ret = print_trace_line(iter);
5812
5813 if (trace_seq_has_overflowed(&iter->seq)) {
5814 iter->seq.seq.len = save_len;
5815 break;
5816 }
5817
5818 /*
5819 * This should not be hit, because it should only
5820 * be set if the iter->seq overflowed. But check it
5821 * anyway to be safe.
5822 */
5823 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5824 iter->seq.seq.len = save_len;
5825 break;
5826 }
5827
5828 count = trace_seq_used(&iter->seq) - save_len;
5829 if (rem < count) {
5830 rem = 0;
5831 iter->seq.seq.len = save_len;
5832 break;
5833 }
5834
5835 if (ret != TRACE_TYPE_NO_CONSUME)
5836 trace_consume(iter);
5837 rem -= count;
5838 if (!trace_find_next_entry_inc(iter)) {
5839 rem = 0;
5840 iter->ent = NULL;
5841 break;
5842 }
5843 }
5844
5845 return rem;
5846 }
5847
5848 static ssize_t tracing_splice_read_pipe(struct file *filp,
5849 loff_t *ppos,
5850 struct pipe_inode_info *pipe,
5851 size_t len,
5852 unsigned int flags)
5853 {
5854 struct page *pages_def[PIPE_DEF_BUFFERS];
5855 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5856 struct trace_iterator *iter = filp->private_data;
5857 struct splice_pipe_desc spd = {
5858 .pages = pages_def,
5859 .partial = partial_def,
5860 .nr_pages = 0, /* This gets updated below. */
5861 .nr_pages_max = PIPE_DEF_BUFFERS,
5862 .ops = &tracing_pipe_buf_ops,
5863 .spd_release = tracing_spd_release_pipe,
5864 };
5865 ssize_t ret;
5866 size_t rem;
5867 unsigned int i;
5868
5869 if (splice_grow_spd(pipe, &spd))
5870 return -ENOMEM;
5871
5872 mutex_lock(&iter->mutex);
5873
5874 if (iter->trace->splice_read) {
5875 ret = iter->trace->splice_read(iter, filp,
5876 ppos, pipe, len, flags);
5877 if (ret)
5878 goto out_err;
5879 }
5880
5881 ret = tracing_wait_pipe(filp);
5882 if (ret <= 0)
5883 goto out_err;
5884
5885 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5886 ret = -EFAULT;
5887 goto out_err;
5888 }
5889
5890 trace_event_read_lock();
5891 trace_access_lock(iter->cpu_file);
5892
5893 /* Fill as many pages as possible. */
5894 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5895 spd.pages[i] = alloc_page(GFP_KERNEL);
5896 if (!spd.pages[i])
5897 break;
5898
5899 rem = tracing_fill_pipe_page(rem, iter);
5900
5901 /* Copy the data into the page, so we can start over. */
5902 ret = trace_seq_to_buffer(&iter->seq,
5903 page_address(spd.pages[i]),
5904 trace_seq_used(&iter->seq));
5905 if (ret < 0) {
5906 __free_page(spd.pages[i]);
5907 break;
5908 }
5909 spd.partial[i].offset = 0;
5910 spd.partial[i].len = trace_seq_used(&iter->seq);
5911
5912 trace_seq_init(&iter->seq);
5913 }
5914
5915 trace_access_unlock(iter->cpu_file);
5916 trace_event_read_unlock();
5917 mutex_unlock(&iter->mutex);
5918
5919 spd.nr_pages = i;
5920
5921 if (i)
5922 ret = splice_to_pipe(pipe, &spd);
5923 else
5924 ret = 0;
5925 out:
5926 splice_shrink_spd(&spd);
5927 return ret;
5928
5929 out_err:
5930 mutex_unlock(&iter->mutex);
5931 goto out;
5932 }
5933
5934 static ssize_t
5935 tracing_entries_read(struct file *filp, char __user *ubuf,
5936 size_t cnt, loff_t *ppos)
5937 {
5938 struct inode *inode = file_inode(filp);
5939 struct trace_array *tr = inode->i_private;
5940 int cpu = tracing_get_cpu(inode);
5941 char buf[64];
5942 int r = 0;
5943 ssize_t ret;
5944
5945 mutex_lock(&trace_types_lock);
5946
5947 if (cpu == RING_BUFFER_ALL_CPUS) {
5948 int cpu, buf_size_same;
5949 unsigned long size;
5950
5951 size = 0;
5952 buf_size_same = 1;
5953 /* check if all cpu sizes are same */
5954 for_each_tracing_cpu(cpu) {
5955 /* fill in the size from first enabled cpu */
5956 if (size == 0)
5957 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5958 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5959 buf_size_same = 0;
5960 break;
5961 }
5962 }
5963
5964 if (buf_size_same) {
5965 if (!ring_buffer_expanded)
5966 r = sprintf(buf, "%lu (expanded: %lu)\n",
5967 size >> 10,
5968 trace_buf_size >> 10);
5969 else
5970 r = sprintf(buf, "%lu\n", size >> 10);
5971 } else
5972 r = sprintf(buf, "X\n");
5973 } else
5974 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5975
5976 mutex_unlock(&trace_types_lock);
5977
5978 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5979 return ret;
5980 }
5981
5982 static ssize_t
5983 tracing_entries_write(struct file *filp, const char __user *ubuf,
5984 size_t cnt, loff_t *ppos)
5985 {
5986 struct inode *inode = file_inode(filp);
5987 struct trace_array *tr = inode->i_private;
5988 unsigned long val;
5989 int ret;
5990
5991 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5992 if (ret)
5993 return ret;
5994
5995 /* must have at least 1 entry */
5996 if (!val)
5997 return -EINVAL;
5998
5999 /* value is in KB */
6000 val <<= 10;
6001 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6002 if (ret < 0)
6003 return ret;
6004
6005 *ppos += cnt;
6006
6007 return cnt;
6008 }
6009
6010 static ssize_t
6011 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6012 size_t cnt, loff_t *ppos)
6013 {
6014 struct trace_array *tr = filp->private_data;
6015 char buf[64];
6016 int r, cpu;
6017 unsigned long size = 0, expanded_size = 0;
6018
6019 mutex_lock(&trace_types_lock);
6020 for_each_tracing_cpu(cpu) {
6021 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6022 if (!ring_buffer_expanded)
6023 expanded_size += trace_buf_size >> 10;
6024 }
6025 if (ring_buffer_expanded)
6026 r = sprintf(buf, "%lu\n", size);
6027 else
6028 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6029 mutex_unlock(&trace_types_lock);
6030
6031 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6032 }
6033
6034 static ssize_t
6035 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6036 size_t cnt, loff_t *ppos)
6037 {
6038 /*
6039 * There is no need to read what the user has written, this function
6040 * is just to make sure that there is no error when "echo" is used
6041 */
6042
6043 *ppos += cnt;
6044
6045 return cnt;
6046 }
6047
6048 static int
6049 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6050 {
6051 struct trace_array *tr = inode->i_private;
6052
6053 /* disable tracing ? */
6054 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6055 tracer_tracing_off(tr);
6056 /* resize the ring buffer to 0 */
6057 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6058
6059 trace_array_put(tr);
6060
6061 return 0;
6062 }
6063
6064 static ssize_t
6065 tracing_mark_write(struct file *filp, const char __user *ubuf,
6066 size_t cnt, loff_t *fpos)
6067 {
6068 struct trace_array *tr = filp->private_data;
6069 struct ring_buffer_event *event;
6070 struct ring_buffer *buffer;
6071 struct print_entry *entry;
6072 unsigned long irq_flags;
6073 const char faulted[] = "<faulted>";
6074 ssize_t written;
6075 int size;
6076 int len;
6077
6078 /* Used in tracing_mark_raw_write() as well */
6079 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6080
6081 if (tracing_disabled)
6082 return -EINVAL;
6083
6084 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6085 return -EINVAL;
6086
6087 if (cnt > TRACE_BUF_SIZE)
6088 cnt = TRACE_BUF_SIZE;
6089
6090 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6091
6092 local_save_flags(irq_flags);
6093 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6094
6095 /* If less than "<faulted>", then make sure we can still add that */
6096 if (cnt < FAULTED_SIZE)
6097 size += FAULTED_SIZE - cnt;
6098
6099 buffer = tr->trace_buffer.buffer;
6100 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6101 irq_flags, preempt_count());
6102 if (unlikely(!event))
6103 /* Ring buffer disabled, return as if not open for write */
6104 return -EBADF;
6105
6106 entry = ring_buffer_event_data(event);
6107 entry->ip = _THIS_IP_;
6108
6109 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6110 if (len) {
6111 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6112 cnt = FAULTED_SIZE;
6113 written = -EFAULT;
6114 } else
6115 written = cnt;
6116 len = cnt;
6117
6118 if (entry->buf[cnt - 1] != '\n') {
6119 entry->buf[cnt] = '\n';
6120 entry->buf[cnt + 1] = '\0';
6121 } else
6122 entry->buf[cnt] = '\0';
6123
6124 __buffer_unlock_commit(buffer, event);
6125
6126 if (written > 0)
6127 *fpos += written;
6128
6129 return written;
6130 }
6131
6132 /* Limit it for now to 3K (including tag) */
6133 #define RAW_DATA_MAX_SIZE (1024*3)
6134
6135 static ssize_t
6136 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6137 size_t cnt, loff_t *fpos)
6138 {
6139 struct trace_array *tr = filp->private_data;
6140 struct ring_buffer_event *event;
6141 struct ring_buffer *buffer;
6142 struct raw_data_entry *entry;
6143 const char faulted[] = "<faulted>";
6144 unsigned long irq_flags;
6145 ssize_t written;
6146 int size;
6147 int len;
6148
6149 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6150
6151 if (tracing_disabled)
6152 return -EINVAL;
6153
6154 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6155 return -EINVAL;
6156
6157 /* The marker must at least have a tag id */
6158 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6159 return -EINVAL;
6160
6161 if (cnt > TRACE_BUF_SIZE)
6162 cnt = TRACE_BUF_SIZE;
6163
6164 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6165
6166 local_save_flags(irq_flags);
6167 size = sizeof(*entry) + cnt;
6168 if (cnt < FAULT_SIZE_ID)
6169 size += FAULT_SIZE_ID - cnt;
6170
6171 buffer = tr->trace_buffer.buffer;
6172 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6173 irq_flags, preempt_count());
6174 if (!event)
6175 /* Ring buffer disabled, return as if not open for write */
6176 return -EBADF;
6177
6178 entry = ring_buffer_event_data(event);
6179
6180 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6181 if (len) {
6182 entry->id = -1;
6183 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6184 written = -EFAULT;
6185 } else
6186 written = cnt;
6187
6188 __buffer_unlock_commit(buffer, event);
6189
6190 if (written > 0)
6191 *fpos += written;
6192
6193 return written;
6194 }
6195
6196 static int tracing_clock_show(struct seq_file *m, void *v)
6197 {
6198 struct trace_array *tr = m->private;
6199 int i;
6200
6201 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6202 seq_printf(m,
6203 "%s%s%s%s", i ? " " : "",
6204 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6205 i == tr->clock_id ? "]" : "");
6206 seq_putc(m, '\n');
6207
6208 return 0;
6209 }
6210
6211 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6212 {
6213 int i;
6214
6215 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6216 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6217 break;
6218 }
6219 if (i == ARRAY_SIZE(trace_clocks))
6220 return -EINVAL;
6221
6222 mutex_lock(&trace_types_lock);
6223
6224 tr->clock_id = i;
6225
6226 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6227
6228 /*
6229 * New clock may not be consistent with the previous clock.
6230 * Reset the buffer so that it doesn't have incomparable timestamps.
6231 */
6232 tracing_reset_online_cpus(&tr->trace_buffer);
6233
6234 #ifdef CONFIG_TRACER_MAX_TRACE
6235 if (tr->max_buffer.buffer)
6236 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6237 tracing_reset_online_cpus(&tr->max_buffer);
6238 #endif
6239
6240 mutex_unlock(&trace_types_lock);
6241
6242 return 0;
6243 }
6244
6245 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6246 size_t cnt, loff_t *fpos)
6247 {
6248 struct seq_file *m = filp->private_data;
6249 struct trace_array *tr = m->private;
6250 char buf[64];
6251 const char *clockstr;
6252 int ret;
6253
6254 if (cnt >= sizeof(buf))
6255 return -EINVAL;
6256
6257 if (copy_from_user(buf, ubuf, cnt))
6258 return -EFAULT;
6259
6260 buf[cnt] = 0;
6261
6262 clockstr = strstrip(buf);
6263
6264 ret = tracing_set_clock(tr, clockstr);
6265 if (ret)
6266 return ret;
6267
6268 *fpos += cnt;
6269
6270 return cnt;
6271 }
6272
6273 static int tracing_clock_open(struct inode *inode, struct file *file)
6274 {
6275 struct trace_array *tr = inode->i_private;
6276 int ret;
6277
6278 if (tracing_disabled)
6279 return -ENODEV;
6280
6281 if (trace_array_get(tr))
6282 return -ENODEV;
6283
6284 ret = single_open(file, tracing_clock_show, inode->i_private);
6285 if (ret < 0)
6286 trace_array_put(tr);
6287
6288 return ret;
6289 }
6290
6291 struct ftrace_buffer_info {
6292 struct trace_iterator iter;
6293 void *spare;
6294 unsigned int spare_cpu;
6295 unsigned int read;
6296 };
6297
6298 #ifdef CONFIG_TRACER_SNAPSHOT
6299 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6300 {
6301 struct trace_array *tr = inode->i_private;
6302 struct trace_iterator *iter;
6303 struct seq_file *m;
6304 int ret = 0;
6305
6306 if (trace_array_get(tr) < 0)
6307 return -ENODEV;
6308
6309 if (file->f_mode & FMODE_READ) {
6310 iter = __tracing_open(inode, file, true);
6311 if (IS_ERR(iter))
6312 ret = PTR_ERR(iter);
6313 } else {
6314 /* Writes still need the seq_file to hold the private data */
6315 ret = -ENOMEM;
6316 m = kzalloc(sizeof(*m), GFP_KERNEL);
6317 if (!m)
6318 goto out;
6319 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6320 if (!iter) {
6321 kfree(m);
6322 goto out;
6323 }
6324 ret = 0;
6325
6326 iter->tr = tr;
6327 iter->trace_buffer = &tr->max_buffer;
6328 iter->cpu_file = tracing_get_cpu(inode);
6329 m->private = iter;
6330 file->private_data = m;
6331 }
6332 out:
6333 if (ret < 0)
6334 trace_array_put(tr);
6335
6336 return ret;
6337 }
6338
6339 static ssize_t
6340 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6341 loff_t *ppos)
6342 {
6343 struct seq_file *m = filp->private_data;
6344 struct trace_iterator *iter = m->private;
6345 struct trace_array *tr = iter->tr;
6346 unsigned long val;
6347 int ret;
6348
6349 ret = tracing_update_buffers();
6350 if (ret < 0)
6351 return ret;
6352
6353 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6354 if (ret)
6355 return ret;
6356
6357 mutex_lock(&trace_types_lock);
6358
6359 if (tr->current_trace->use_max_tr) {
6360 ret = -EBUSY;
6361 goto out;
6362 }
6363
6364 switch (val) {
6365 case 0:
6366 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6367 ret = -EINVAL;
6368 break;
6369 }
6370 if (tr->allocated_snapshot)
6371 free_snapshot(tr);
6372 break;
6373 case 1:
6374 /* Only allow per-cpu swap if the ring buffer supports it */
6375 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6376 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6377 ret = -EINVAL;
6378 break;
6379 }
6380 #endif
6381 if (!tr->allocated_snapshot) {
6382 ret = alloc_snapshot(tr);
6383 if (ret < 0)
6384 break;
6385 }
6386 local_irq_disable();
6387 /* Now, we're going to swap */
6388 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6389 update_max_tr(tr, current, smp_processor_id());
6390 else
6391 update_max_tr_single(tr, current, iter->cpu_file);
6392 local_irq_enable();
6393 break;
6394 default:
6395 if (tr->allocated_snapshot) {
6396 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6397 tracing_reset_online_cpus(&tr->max_buffer);
6398 else
6399 tracing_reset(&tr->max_buffer, iter->cpu_file);
6400 }
6401 break;
6402 }
6403
6404 if (ret >= 0) {
6405 *ppos += cnt;
6406 ret = cnt;
6407 }
6408 out:
6409 mutex_unlock(&trace_types_lock);
6410 return ret;
6411 }
6412
6413 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6414 {
6415 struct seq_file *m = file->private_data;
6416 int ret;
6417
6418 ret = tracing_release(inode, file);
6419
6420 if (file->f_mode & FMODE_READ)
6421 return ret;
6422
6423 /* If write only, the seq_file is just a stub */
6424 if (m)
6425 kfree(m->private);
6426 kfree(m);
6427
6428 return 0;
6429 }
6430
6431 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6432 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6433 size_t count, loff_t *ppos);
6434 static int tracing_buffers_release(struct inode *inode, struct file *file);
6435 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6436 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6437
6438 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6439 {
6440 struct ftrace_buffer_info *info;
6441 int ret;
6442
6443 ret = tracing_buffers_open(inode, filp);
6444 if (ret < 0)
6445 return ret;
6446
6447 info = filp->private_data;
6448
6449 if (info->iter.trace->use_max_tr) {
6450 tracing_buffers_release(inode, filp);
6451 return -EBUSY;
6452 }
6453
6454 info->iter.snapshot = true;
6455 info->iter.trace_buffer = &info->iter.tr->max_buffer;
6456
6457 return ret;
6458 }
6459
6460 #endif /* CONFIG_TRACER_SNAPSHOT */
6461
6462
6463 static const struct file_operations tracing_thresh_fops = {
6464 .open = tracing_open_generic,
6465 .read = tracing_thresh_read,
6466 .write = tracing_thresh_write,
6467 .llseek = generic_file_llseek,
6468 };
6469
6470 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6471 static const struct file_operations tracing_max_lat_fops = {
6472 .open = tracing_open_generic,
6473 .read = tracing_max_lat_read,
6474 .write = tracing_max_lat_write,
6475 .llseek = generic_file_llseek,
6476 };
6477 #endif
6478
6479 static const struct file_operations set_tracer_fops = {
6480 .open = tracing_open_generic,
6481 .read = tracing_set_trace_read,
6482 .write = tracing_set_trace_write,
6483 .llseek = generic_file_llseek,
6484 };
6485
6486 static const struct file_operations tracing_pipe_fops = {
6487 .open = tracing_open_pipe,
6488 .poll = tracing_poll_pipe,
6489 .read = tracing_read_pipe,
6490 .splice_read = tracing_splice_read_pipe,
6491 .release = tracing_release_pipe,
6492 .llseek = no_llseek,
6493 };
6494
6495 static const struct file_operations tracing_entries_fops = {
6496 .open = tracing_open_generic_tr,
6497 .read = tracing_entries_read,
6498 .write = tracing_entries_write,
6499 .llseek = generic_file_llseek,
6500 .release = tracing_release_generic_tr,
6501 };
6502
6503 static const struct file_operations tracing_total_entries_fops = {
6504 .open = tracing_open_generic_tr,
6505 .read = tracing_total_entries_read,
6506 .llseek = generic_file_llseek,
6507 .release = tracing_release_generic_tr,
6508 };
6509
6510 static const struct file_operations tracing_free_buffer_fops = {
6511 .open = tracing_open_generic_tr,
6512 .write = tracing_free_buffer_write,
6513 .release = tracing_free_buffer_release,
6514 };
6515
6516 static const struct file_operations tracing_mark_fops = {
6517 .open = tracing_open_generic_tr,
6518 .write = tracing_mark_write,
6519 .llseek = generic_file_llseek,
6520 .release = tracing_release_generic_tr,
6521 };
6522
6523 static const struct file_operations tracing_mark_raw_fops = {
6524 .open = tracing_open_generic_tr,
6525 .write = tracing_mark_raw_write,
6526 .llseek = generic_file_llseek,
6527 .release = tracing_release_generic_tr,
6528 };
6529
6530 static const struct file_operations trace_clock_fops = {
6531 .open = tracing_clock_open,
6532 .read = seq_read,
6533 .llseek = seq_lseek,
6534 .release = tracing_single_release_tr,
6535 .write = tracing_clock_write,
6536 };
6537
6538 #ifdef CONFIG_TRACER_SNAPSHOT
6539 static const struct file_operations snapshot_fops = {
6540 .open = tracing_snapshot_open,
6541 .read = seq_read,
6542 .write = tracing_snapshot_write,
6543 .llseek = tracing_lseek,
6544 .release = tracing_snapshot_release,
6545 };
6546
6547 static const struct file_operations snapshot_raw_fops = {
6548 .open = snapshot_raw_open,
6549 .read = tracing_buffers_read,
6550 .release = tracing_buffers_release,
6551 .splice_read = tracing_buffers_splice_read,
6552 .llseek = no_llseek,
6553 };
6554
6555 #endif /* CONFIG_TRACER_SNAPSHOT */
6556
6557 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6558 {
6559 struct trace_array *tr = inode->i_private;
6560 struct ftrace_buffer_info *info;
6561 int ret;
6562
6563 if (tracing_disabled)
6564 return -ENODEV;
6565
6566 if (trace_array_get(tr) < 0)
6567 return -ENODEV;
6568
6569 info = kzalloc(sizeof(*info), GFP_KERNEL);
6570 if (!info) {
6571 trace_array_put(tr);
6572 return -ENOMEM;
6573 }
6574
6575 mutex_lock(&trace_types_lock);
6576
6577 info->iter.tr = tr;
6578 info->iter.cpu_file = tracing_get_cpu(inode);
6579 info->iter.trace = tr->current_trace;
6580 info->iter.trace_buffer = &tr->trace_buffer;
6581 info->spare = NULL;
6582 /* Force reading ring buffer for first read */
6583 info->read = (unsigned int)-1;
6584
6585 filp->private_data = info;
6586
6587 tr->current_trace->ref++;
6588
6589 mutex_unlock(&trace_types_lock);
6590
6591 ret = nonseekable_open(inode, filp);
6592 if (ret < 0)
6593 trace_array_put(tr);
6594
6595 return ret;
6596 }
6597
6598 static unsigned int
6599 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6600 {
6601 struct ftrace_buffer_info *info = filp->private_data;
6602 struct trace_iterator *iter = &info->iter;
6603
6604 return trace_poll(iter, filp, poll_table);
6605 }
6606
6607 static ssize_t
6608 tracing_buffers_read(struct file *filp, char __user *ubuf,
6609 size_t count, loff_t *ppos)
6610 {
6611 struct ftrace_buffer_info *info = filp->private_data;
6612 struct trace_iterator *iter = &info->iter;
6613 ssize_t ret = 0;
6614 ssize_t size;
6615
6616 if (!count)
6617 return 0;
6618
6619 #ifdef CONFIG_TRACER_MAX_TRACE
6620 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6621 return -EBUSY;
6622 #endif
6623
6624 if (!info->spare) {
6625 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6626 iter->cpu_file);
6627 if (IS_ERR(info->spare)) {
6628 ret = PTR_ERR(info->spare);
6629 info->spare = NULL;
6630 } else {
6631 info->spare_cpu = iter->cpu_file;
6632 }
6633 }
6634 if (!info->spare)
6635 return ret;
6636
6637 /* Do we have previous read data to read? */
6638 if (info->read < PAGE_SIZE)
6639 goto read;
6640
6641 again:
6642 trace_access_lock(iter->cpu_file);
6643 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6644 &info->spare,
6645 count,
6646 iter->cpu_file, 0);
6647 trace_access_unlock(iter->cpu_file);
6648
6649 if (ret < 0) {
6650 if (trace_empty(iter)) {
6651 if ((filp->f_flags & O_NONBLOCK))
6652 return -EAGAIN;
6653
6654 ret = wait_on_pipe(iter, false);
6655 if (ret)
6656 return ret;
6657
6658 goto again;
6659 }
6660 return 0;
6661 }
6662
6663 info->read = 0;
6664 read:
6665 size = PAGE_SIZE - info->read;
6666 if (size > count)
6667 size = count;
6668
6669 ret = copy_to_user(ubuf, info->spare + info->read, size);
6670 if (ret == size)
6671 return -EFAULT;
6672
6673 size -= ret;
6674
6675 *ppos += size;
6676 info->read += size;
6677
6678 return size;
6679 }
6680
6681 static int tracing_buffers_release(struct inode *inode, struct file *file)
6682 {
6683 struct ftrace_buffer_info *info = file->private_data;
6684 struct trace_iterator *iter = &info->iter;
6685
6686 mutex_lock(&trace_types_lock);
6687
6688 iter->tr->current_trace->ref--;
6689
6690 __trace_array_put(iter->tr);
6691
6692 if (info->spare)
6693 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6694 info->spare_cpu, info->spare);
6695 kfree(info);
6696
6697 mutex_unlock(&trace_types_lock);
6698
6699 return 0;
6700 }
6701
6702 struct buffer_ref {
6703 struct ring_buffer *buffer;
6704 void *page;
6705 int cpu;
6706 int ref;
6707 };
6708
6709 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6710 struct pipe_buffer *buf)
6711 {
6712 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6713
6714 if (--ref->ref)
6715 return;
6716
6717 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6718 kfree(ref);
6719 buf->private = 0;
6720 }
6721
6722 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6723 struct pipe_buffer *buf)
6724 {
6725 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6726
6727 ref->ref++;
6728 }
6729
6730 /* Pipe buffer operations for a buffer. */
6731 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6732 .can_merge = 0,
6733 .confirm = generic_pipe_buf_confirm,
6734 .release = buffer_pipe_buf_release,
6735 .steal = generic_pipe_buf_steal,
6736 .get = buffer_pipe_buf_get,
6737 };
6738
6739 /*
6740 * Callback from splice_to_pipe(), if we need to release some pages
6741 * at the end of the spd in case we error'ed out in filling the pipe.
6742 */
6743 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6744 {
6745 struct buffer_ref *ref =
6746 (struct buffer_ref *)spd->partial[i].private;
6747
6748 if (--ref->ref)
6749 return;
6750
6751 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6752 kfree(ref);
6753 spd->partial[i].private = 0;
6754 }
6755
6756 static ssize_t
6757 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6758 struct pipe_inode_info *pipe, size_t len,
6759 unsigned int flags)
6760 {
6761 struct ftrace_buffer_info *info = file->private_data;
6762 struct trace_iterator *iter = &info->iter;
6763 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6764 struct page *pages_def[PIPE_DEF_BUFFERS];
6765 struct splice_pipe_desc spd = {
6766 .pages = pages_def,
6767 .partial = partial_def,
6768 .nr_pages_max = PIPE_DEF_BUFFERS,
6769 .ops = &buffer_pipe_buf_ops,
6770 .spd_release = buffer_spd_release,
6771 };
6772 struct buffer_ref *ref;
6773 int entries, size, i;
6774 ssize_t ret = 0;
6775
6776 #ifdef CONFIG_TRACER_MAX_TRACE
6777 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6778 return -EBUSY;
6779 #endif
6780
6781 if (*ppos & (PAGE_SIZE - 1))
6782 return -EINVAL;
6783
6784 if (len & (PAGE_SIZE - 1)) {
6785 if (len < PAGE_SIZE)
6786 return -EINVAL;
6787 len &= PAGE_MASK;
6788 }
6789
6790 if (splice_grow_spd(pipe, &spd))
6791 return -ENOMEM;
6792
6793 again:
6794 trace_access_lock(iter->cpu_file);
6795 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6796
6797 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6798 struct page *page;
6799 int r;
6800
6801 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6802 if (!ref) {
6803 ret = -ENOMEM;
6804 break;
6805 }
6806
6807 ref->ref = 1;
6808 ref->buffer = iter->trace_buffer->buffer;
6809 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6810 if (IS_ERR(ref->page)) {
6811 ret = PTR_ERR(ref->page);
6812 ref->page = NULL;
6813 kfree(ref);
6814 break;
6815 }
6816 ref->cpu = iter->cpu_file;
6817
6818 r = ring_buffer_read_page(ref->buffer, &ref->page,
6819 len, iter->cpu_file, 1);
6820 if (r < 0) {
6821 ring_buffer_free_read_page(ref->buffer, ref->cpu,
6822 ref->page);
6823 kfree(ref);
6824 break;
6825 }
6826
6827 /*
6828 * zero out any left over data, this is going to
6829 * user land.
6830 */
6831 size = ring_buffer_page_len(ref->page);
6832 if (size < PAGE_SIZE)
6833 memset(ref->page + size, 0, PAGE_SIZE - size);
6834
6835 page = virt_to_page(ref->page);
6836
6837 spd.pages[i] = page;
6838 spd.partial[i].len = PAGE_SIZE;
6839 spd.partial[i].offset = 0;
6840 spd.partial[i].private = (unsigned long)ref;
6841 spd.nr_pages++;
6842 *ppos += PAGE_SIZE;
6843
6844 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6845 }
6846
6847 trace_access_unlock(iter->cpu_file);
6848 spd.nr_pages = i;
6849
6850 /* did we read anything? */
6851 if (!spd.nr_pages) {
6852 if (ret)
6853 goto out;
6854
6855 ret = -EAGAIN;
6856 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6857 goto out;
6858
6859 ret = wait_on_pipe(iter, true);
6860 if (ret)
6861 goto out;
6862
6863 goto again;
6864 }
6865
6866 ret = splice_to_pipe(pipe, &spd);
6867 out:
6868 splice_shrink_spd(&spd);
6869
6870 return ret;
6871 }
6872
6873 static const struct file_operations tracing_buffers_fops = {
6874 .open = tracing_buffers_open,
6875 .read = tracing_buffers_read,
6876 .poll = tracing_buffers_poll,
6877 .release = tracing_buffers_release,
6878 .splice_read = tracing_buffers_splice_read,
6879 .llseek = no_llseek,
6880 };
6881
6882 static ssize_t
6883 tracing_stats_read(struct file *filp, char __user *ubuf,
6884 size_t count, loff_t *ppos)
6885 {
6886 struct inode *inode = file_inode(filp);
6887 struct trace_array *tr = inode->i_private;
6888 struct trace_buffer *trace_buf = &tr->trace_buffer;
6889 int cpu = tracing_get_cpu(inode);
6890 struct trace_seq *s;
6891 unsigned long cnt;
6892 unsigned long long t;
6893 unsigned long usec_rem;
6894
6895 s = kmalloc(sizeof(*s), GFP_KERNEL);
6896 if (!s)
6897 return -ENOMEM;
6898
6899 trace_seq_init(s);
6900
6901 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6902 trace_seq_printf(s, "entries: %ld\n", cnt);
6903
6904 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6905 trace_seq_printf(s, "overrun: %ld\n", cnt);
6906
6907 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6908 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6909
6910 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6911 trace_seq_printf(s, "bytes: %ld\n", cnt);
6912
6913 if (trace_clocks[tr->clock_id].in_ns) {
6914 /* local or global for trace_clock */
6915 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6916 usec_rem = do_div(t, USEC_PER_SEC);
6917 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6918 t, usec_rem);
6919
6920 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6921 usec_rem = do_div(t, USEC_PER_SEC);
6922 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6923 } else {
6924 /* counter or tsc mode for trace_clock */
6925 trace_seq_printf(s, "oldest event ts: %llu\n",
6926 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6927
6928 trace_seq_printf(s, "now ts: %llu\n",
6929 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6930 }
6931
6932 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6933 trace_seq_printf(s, "dropped events: %ld\n", cnt);
6934
6935 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6936 trace_seq_printf(s, "read events: %ld\n", cnt);
6937
6938 count = simple_read_from_buffer(ubuf, count, ppos,
6939 s->buffer, trace_seq_used(s));
6940
6941 kfree(s);
6942
6943 return count;
6944 }
6945
6946 static const struct file_operations tracing_stats_fops = {
6947 .open = tracing_open_generic_tr,
6948 .read = tracing_stats_read,
6949 .llseek = generic_file_llseek,
6950 .release = tracing_release_generic_tr,
6951 };
6952
6953 #ifdef CONFIG_DYNAMIC_FTRACE
6954
6955 static ssize_t
6956 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6957 size_t cnt, loff_t *ppos)
6958 {
6959 unsigned long *p = filp->private_data;
6960 char buf[64]; /* Not too big for a shallow stack */
6961 int r;
6962
6963 r = scnprintf(buf, 63, "%ld", *p);
6964 buf[r++] = '\n';
6965
6966 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6967 }
6968
6969 static const struct file_operations tracing_dyn_info_fops = {
6970 .open = tracing_open_generic,
6971 .read = tracing_read_dyn_info,
6972 .llseek = generic_file_llseek,
6973 };
6974 #endif /* CONFIG_DYNAMIC_FTRACE */
6975
6976 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6977 static void
6978 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6979 struct trace_array *tr, struct ftrace_probe_ops *ops,
6980 void *data)
6981 {
6982 tracing_snapshot_instance(tr);
6983 }
6984
6985 static void
6986 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6987 struct trace_array *tr, struct ftrace_probe_ops *ops,
6988 void *data)
6989 {
6990 struct ftrace_func_mapper *mapper = data;
6991 long *count = NULL;
6992
6993 if (mapper)
6994 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6995
6996 if (count) {
6997
6998 if (*count <= 0)
6999 return;
7000
7001 (*count)--;
7002 }
7003
7004 tracing_snapshot_instance(tr);
7005 }
7006
7007 static int
7008 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7009 struct ftrace_probe_ops *ops, void *data)
7010 {
7011 struct ftrace_func_mapper *mapper = data;
7012 long *count = NULL;
7013
7014 seq_printf(m, "%ps:", (void *)ip);
7015
7016 seq_puts(m, "snapshot");
7017
7018 if (mapper)
7019 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7020
7021 if (count)
7022 seq_printf(m, ":count=%ld\n", *count);
7023 else
7024 seq_puts(m, ":unlimited\n");
7025
7026 return 0;
7027 }
7028
7029 static int
7030 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7031 unsigned long ip, void *init_data, void **data)
7032 {
7033 struct ftrace_func_mapper *mapper = *data;
7034
7035 if (!mapper) {
7036 mapper = allocate_ftrace_func_mapper();
7037 if (!mapper)
7038 return -ENOMEM;
7039 *data = mapper;
7040 }
7041
7042 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7043 }
7044
7045 static void
7046 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7047 unsigned long ip, void *data)
7048 {
7049 struct ftrace_func_mapper *mapper = data;
7050
7051 if (!ip) {
7052 if (!mapper)
7053 return;
7054 free_ftrace_func_mapper(mapper, NULL);
7055 return;
7056 }
7057
7058 ftrace_func_mapper_remove_ip(mapper, ip);
7059 }
7060
7061 static struct ftrace_probe_ops snapshot_probe_ops = {
7062 .func = ftrace_snapshot,
7063 .print = ftrace_snapshot_print,
7064 };
7065
7066 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7067 .func = ftrace_count_snapshot,
7068 .print = ftrace_snapshot_print,
7069 .init = ftrace_snapshot_init,
7070 .free = ftrace_snapshot_free,
7071 };
7072
7073 static int
7074 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7075 char *glob, char *cmd, char *param, int enable)
7076 {
7077 struct ftrace_probe_ops *ops;
7078 void *count = (void *)-1;
7079 char *number;
7080 int ret;
7081
7082 if (!tr)
7083 return -ENODEV;
7084
7085 /* hash funcs only work with set_ftrace_filter */
7086 if (!enable)
7087 return -EINVAL;
7088
7089 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
7090
7091 if (glob[0] == '!')
7092 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7093
7094 if (!param)
7095 goto out_reg;
7096
7097 number = strsep(&param, ":");
7098
7099 if (!strlen(number))
7100 goto out_reg;
7101
7102 /*
7103 * We use the callback data field (which is a pointer)
7104 * as our counter.
7105 */
7106 ret = kstrtoul(number, 0, (unsigned long *)&count);
7107 if (ret)
7108 return ret;
7109
7110 out_reg:
7111 ret = alloc_snapshot(tr);
7112 if (ret < 0)
7113 goto out;
7114
7115 ret = register_ftrace_function_probe(glob, tr, ops, count);
7116
7117 out:
7118 return ret < 0 ? ret : 0;
7119 }
7120
7121 static struct ftrace_func_command ftrace_snapshot_cmd = {
7122 .name = "snapshot",
7123 .func = ftrace_trace_snapshot_callback,
7124 };
7125
7126 static __init int register_snapshot_cmd(void)
7127 {
7128 return register_ftrace_command(&ftrace_snapshot_cmd);
7129 }
7130 #else
7131 static inline __init int register_snapshot_cmd(void) { return 0; }
7132 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7133
7134 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7135 {
7136 if (WARN_ON(!tr->dir))
7137 return ERR_PTR(-ENODEV);
7138
7139 /* Top directory uses NULL as the parent */
7140 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7141 return NULL;
7142
7143 /* All sub buffers have a descriptor */
7144 return tr->dir;
7145 }
7146
7147 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7148 {
7149 struct dentry *d_tracer;
7150
7151 if (tr->percpu_dir)
7152 return tr->percpu_dir;
7153
7154 d_tracer = tracing_get_dentry(tr);
7155 if (IS_ERR(d_tracer))
7156 return NULL;
7157
7158 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7159
7160 WARN_ONCE(!tr->percpu_dir,
7161 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7162
7163 return tr->percpu_dir;
7164 }
7165
7166 static struct dentry *
7167 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7168 void *data, long cpu, const struct file_operations *fops)
7169 {
7170 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7171
7172 if (ret) /* See tracing_get_cpu() */
7173 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7174 return ret;
7175 }
7176
7177 static void
7178 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7179 {
7180 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7181 struct dentry *d_cpu;
7182 char cpu_dir[30]; /* 30 characters should be more than enough */
7183
7184 if (!d_percpu)
7185 return;
7186
7187 snprintf(cpu_dir, 30, "cpu%ld", cpu);
7188 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7189 if (!d_cpu) {
7190 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7191 return;
7192 }
7193
7194 /* per cpu trace_pipe */
7195 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7196 tr, cpu, &tracing_pipe_fops);
7197
7198 /* per cpu trace */
7199 trace_create_cpu_file("trace", 0644, d_cpu,
7200 tr, cpu, &tracing_fops);
7201
7202 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7203 tr, cpu, &tracing_buffers_fops);
7204
7205 trace_create_cpu_file("stats", 0444, d_cpu,
7206 tr, cpu, &tracing_stats_fops);
7207
7208 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7209 tr, cpu, &tracing_entries_fops);
7210
7211 #ifdef CONFIG_TRACER_SNAPSHOT
7212 trace_create_cpu_file("snapshot", 0644, d_cpu,
7213 tr, cpu, &snapshot_fops);
7214
7215 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7216 tr, cpu, &snapshot_raw_fops);
7217 #endif
7218 }
7219
7220 #ifdef CONFIG_FTRACE_SELFTEST
7221 /* Let selftest have access to static functions in this file */
7222 #include "trace_selftest.c"
7223 #endif
7224
7225 static ssize_t
7226 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7227 loff_t *ppos)
7228 {
7229 struct trace_option_dentry *topt = filp->private_data;
7230 char *buf;
7231
7232 if (topt->flags->val & topt->opt->bit)
7233 buf = "1\n";
7234 else
7235 buf = "0\n";
7236
7237 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7238 }
7239
7240 static ssize_t
7241 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7242 loff_t *ppos)
7243 {
7244 struct trace_option_dentry *topt = filp->private_data;
7245 unsigned long val;
7246 int ret;
7247
7248 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7249 if (ret)
7250 return ret;
7251
7252 if (val != 0 && val != 1)
7253 return -EINVAL;
7254
7255 if (!!(topt->flags->val & topt->opt->bit) != val) {
7256 mutex_lock(&trace_types_lock);
7257 ret = __set_tracer_option(topt->tr, topt->flags,
7258 topt->opt, !val);
7259 mutex_unlock(&trace_types_lock);
7260 if (ret)
7261 return ret;
7262 }
7263
7264 *ppos += cnt;
7265
7266 return cnt;
7267 }
7268
7269
7270 static const struct file_operations trace_options_fops = {
7271 .open = tracing_open_generic,
7272 .read = trace_options_read,
7273 .write = trace_options_write,
7274 .llseek = generic_file_llseek,
7275 };
7276
7277 /*
7278 * In order to pass in both the trace_array descriptor as well as the index
7279 * to the flag that the trace option file represents, the trace_array
7280 * has a character array of trace_flags_index[], which holds the index
7281 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7282 * The address of this character array is passed to the flag option file
7283 * read/write callbacks.
7284 *
7285 * In order to extract both the index and the trace_array descriptor,
7286 * get_tr_index() uses the following algorithm.
7287 *
7288 * idx = *ptr;
7289 *
7290 * As the pointer itself contains the address of the index (remember
7291 * index[1] == 1).
7292 *
7293 * Then to get the trace_array descriptor, by subtracting that index
7294 * from the ptr, we get to the start of the index itself.
7295 *
7296 * ptr - idx == &index[0]
7297 *
7298 * Then a simple container_of() from that pointer gets us to the
7299 * trace_array descriptor.
7300 */
7301 static void get_tr_index(void *data, struct trace_array **ptr,
7302 unsigned int *pindex)
7303 {
7304 *pindex = *(unsigned char *)data;
7305
7306 *ptr = container_of(data - *pindex, struct trace_array,
7307 trace_flags_index);
7308 }
7309
7310 static ssize_t
7311 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7312 loff_t *ppos)
7313 {
7314 void *tr_index = filp->private_data;
7315 struct trace_array *tr;
7316 unsigned int index;
7317 char *buf;
7318
7319 get_tr_index(tr_index, &tr, &index);
7320
7321 if (tr->trace_flags & (1 << index))
7322 buf = "1\n";
7323 else
7324 buf = "0\n";
7325
7326 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7327 }
7328
7329 static ssize_t
7330 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7331 loff_t *ppos)
7332 {
7333 void *tr_index = filp->private_data;
7334 struct trace_array *tr;
7335 unsigned int index;
7336 unsigned long val;
7337 int ret;
7338
7339 get_tr_index(tr_index, &tr, &index);
7340
7341 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7342 if (ret)
7343 return ret;
7344
7345 if (val != 0 && val != 1)
7346 return -EINVAL;
7347
7348 mutex_lock(&trace_types_lock);
7349 ret = set_tracer_flag(tr, 1 << index, val);
7350 mutex_unlock(&trace_types_lock);
7351
7352 if (ret < 0)
7353 return ret;
7354
7355 *ppos += cnt;
7356
7357 return cnt;
7358 }
7359
7360 static const struct file_operations trace_options_core_fops = {
7361 .open = tracing_open_generic,
7362 .read = trace_options_core_read,
7363 .write = trace_options_core_write,
7364 .llseek = generic_file_llseek,
7365 };
7366
7367 struct dentry *trace_create_file(const char *name,
7368 umode_t mode,
7369 struct dentry *parent,
7370 void *data,
7371 const struct file_operations *fops)
7372 {
7373 struct dentry *ret;
7374
7375 ret = tracefs_create_file(name, mode, parent, data, fops);
7376 if (!ret)
7377 pr_warn("Could not create tracefs '%s' entry\n", name);
7378
7379 return ret;
7380 }
7381
7382
7383 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7384 {
7385 struct dentry *d_tracer;
7386
7387 if (tr->options)
7388 return tr->options;
7389
7390 d_tracer = tracing_get_dentry(tr);
7391 if (IS_ERR(d_tracer))
7392 return NULL;
7393
7394 tr->options = tracefs_create_dir("options", d_tracer);
7395 if (!tr->options) {
7396 pr_warn("Could not create tracefs directory 'options'\n");
7397 return NULL;
7398 }
7399
7400 return tr->options;
7401 }
7402
7403 static void
7404 create_trace_option_file(struct trace_array *tr,
7405 struct trace_option_dentry *topt,
7406 struct tracer_flags *flags,
7407 struct tracer_opt *opt)
7408 {
7409 struct dentry *t_options;
7410
7411 t_options = trace_options_init_dentry(tr);
7412 if (!t_options)
7413 return;
7414
7415 topt->flags = flags;
7416 topt->opt = opt;
7417 topt->tr = tr;
7418
7419 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7420 &trace_options_fops);
7421
7422 }
7423
7424 static void
7425 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7426 {
7427 struct trace_option_dentry *topts;
7428 struct trace_options *tr_topts;
7429 struct tracer_flags *flags;
7430 struct tracer_opt *opts;
7431 int cnt;
7432 int i;
7433
7434 if (!tracer)
7435 return;
7436
7437 flags = tracer->flags;
7438
7439 if (!flags || !flags->opts)
7440 return;
7441
7442 /*
7443 * If this is an instance, only create flags for tracers
7444 * the instance may have.
7445 */
7446 if (!trace_ok_for_array(tracer, tr))
7447 return;
7448
7449 for (i = 0; i < tr->nr_topts; i++) {
7450 /* Make sure there's no duplicate flags. */
7451 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7452 return;
7453 }
7454
7455 opts = flags->opts;
7456
7457 for (cnt = 0; opts[cnt].name; cnt++)
7458 ;
7459
7460 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7461 if (!topts)
7462 return;
7463
7464 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7465 GFP_KERNEL);
7466 if (!tr_topts) {
7467 kfree(topts);
7468 return;
7469 }
7470
7471 tr->topts = tr_topts;
7472 tr->topts[tr->nr_topts].tracer = tracer;
7473 tr->topts[tr->nr_topts].topts = topts;
7474 tr->nr_topts++;
7475
7476 for (cnt = 0; opts[cnt].name; cnt++) {
7477 create_trace_option_file(tr, &topts[cnt], flags,
7478 &opts[cnt]);
7479 WARN_ONCE(topts[cnt].entry == NULL,
7480 "Failed to create trace option: %s",
7481 opts[cnt].name);
7482 }
7483 }
7484
7485 static struct dentry *
7486 create_trace_option_core_file(struct trace_array *tr,
7487 const char *option, long index)
7488 {
7489 struct dentry *t_options;
7490
7491 t_options = trace_options_init_dentry(tr);
7492 if (!t_options)
7493 return NULL;
7494
7495 return trace_create_file(option, 0644, t_options,
7496 (void *)&tr->trace_flags_index[index],
7497 &trace_options_core_fops);
7498 }
7499
7500 static void create_trace_options_dir(struct trace_array *tr)
7501 {
7502 struct dentry *t_options;
7503 bool top_level = tr == &global_trace;
7504 int i;
7505
7506 t_options = trace_options_init_dentry(tr);
7507 if (!t_options)
7508 return;
7509
7510 for (i = 0; trace_options[i]; i++) {
7511 if (top_level ||
7512 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7513 create_trace_option_core_file(tr, trace_options[i], i);
7514 }
7515 }
7516
7517 static ssize_t
7518 rb_simple_read(struct file *filp, char __user *ubuf,
7519 size_t cnt, loff_t *ppos)
7520 {
7521 struct trace_array *tr = filp->private_data;
7522 char buf[64];
7523 int r;
7524
7525 r = tracer_tracing_is_on(tr);
7526 r = sprintf(buf, "%d\n", r);
7527
7528 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7529 }
7530
7531 static ssize_t
7532 rb_simple_write(struct file *filp, const char __user *ubuf,
7533 size_t cnt, loff_t *ppos)
7534 {
7535 struct trace_array *tr = filp->private_data;
7536 struct ring_buffer *buffer = tr->trace_buffer.buffer;
7537 unsigned long val;
7538 int ret;
7539
7540 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7541 if (ret)
7542 return ret;
7543
7544 if (buffer) {
7545 mutex_lock(&trace_types_lock);
7546 if (val) {
7547 tracer_tracing_on(tr);
7548 if (tr->current_trace->start)
7549 tr->current_trace->start(tr);
7550 } else {
7551 tracer_tracing_off(tr);
7552 if (tr->current_trace->stop)
7553 tr->current_trace->stop(tr);
7554 }
7555 mutex_unlock(&trace_types_lock);
7556 }
7557
7558 (*ppos)++;
7559
7560 return cnt;
7561 }
7562
7563 static const struct file_operations rb_simple_fops = {
7564 .open = tracing_open_generic_tr,
7565 .read = rb_simple_read,
7566 .write = rb_simple_write,
7567 .release = tracing_release_generic_tr,
7568 .llseek = default_llseek,
7569 };
7570
7571 struct dentry *trace_instance_dir;
7572
7573 static void
7574 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7575
7576 static int
7577 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7578 {
7579 enum ring_buffer_flags rb_flags;
7580
7581 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7582
7583 buf->tr = tr;
7584
7585 buf->buffer = ring_buffer_alloc(size, rb_flags);
7586 if (!buf->buffer)
7587 return -ENOMEM;
7588
7589 buf->data = alloc_percpu(struct trace_array_cpu);
7590 if (!buf->data) {
7591 ring_buffer_free(buf->buffer);
7592 return -ENOMEM;
7593 }
7594
7595 /* Allocate the first page for all buffers */
7596 set_buffer_entries(&tr->trace_buffer,
7597 ring_buffer_size(tr->trace_buffer.buffer, 0));
7598
7599 return 0;
7600 }
7601
7602 static int allocate_trace_buffers(struct trace_array *tr, int size)
7603 {
7604 int ret;
7605
7606 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7607 if (ret)
7608 return ret;
7609
7610 #ifdef CONFIG_TRACER_MAX_TRACE
7611 ret = allocate_trace_buffer(tr, &tr->max_buffer,
7612 allocate_snapshot ? size : 1);
7613 if (WARN_ON(ret)) {
7614 ring_buffer_free(tr->trace_buffer.buffer);
7615 free_percpu(tr->trace_buffer.data);
7616 return -ENOMEM;
7617 }
7618 tr->allocated_snapshot = allocate_snapshot;
7619
7620 /*
7621 * Only the top level trace array gets its snapshot allocated
7622 * from the kernel command line.
7623 */
7624 allocate_snapshot = false;
7625 #endif
7626 return 0;
7627 }
7628
7629 static void free_trace_buffer(struct trace_buffer *buf)
7630 {
7631 if (buf->buffer) {
7632 ring_buffer_free(buf->buffer);
7633 buf->buffer = NULL;
7634 free_percpu(buf->data);
7635 buf->data = NULL;
7636 }
7637 }
7638
7639 static void free_trace_buffers(struct trace_array *tr)
7640 {
7641 if (!tr)
7642 return;
7643
7644 free_trace_buffer(&tr->trace_buffer);
7645
7646 #ifdef CONFIG_TRACER_MAX_TRACE
7647 free_trace_buffer(&tr->max_buffer);
7648 #endif
7649 }
7650
7651 static void init_trace_flags_index(struct trace_array *tr)
7652 {
7653 int i;
7654
7655 /* Used by the trace options files */
7656 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7657 tr->trace_flags_index[i] = i;
7658 }
7659
7660 static void __update_tracer_options(struct trace_array *tr)
7661 {
7662 struct tracer *t;
7663
7664 for (t = trace_types; t; t = t->next)
7665 add_tracer_options(tr, t);
7666 }
7667
7668 static void update_tracer_options(struct trace_array *tr)
7669 {
7670 mutex_lock(&trace_types_lock);
7671 __update_tracer_options(tr);
7672 mutex_unlock(&trace_types_lock);
7673 }
7674
7675 static int instance_mkdir(const char *name)
7676 {
7677 struct trace_array *tr;
7678 int ret;
7679
7680 mutex_lock(&trace_types_lock);
7681
7682 ret = -EEXIST;
7683 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7684 if (tr->name && strcmp(tr->name, name) == 0)
7685 goto out_unlock;
7686 }
7687
7688 ret = -ENOMEM;
7689 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7690 if (!tr)
7691 goto out_unlock;
7692
7693 tr->name = kstrdup(name, GFP_KERNEL);
7694 if (!tr->name)
7695 goto out_free_tr;
7696
7697 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7698 goto out_free_tr;
7699
7700 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7701
7702 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7703
7704 raw_spin_lock_init(&tr->start_lock);
7705
7706 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7707
7708 tr->current_trace = &nop_trace;
7709
7710 INIT_LIST_HEAD(&tr->systems);
7711 INIT_LIST_HEAD(&tr->events);
7712
7713 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7714 goto out_free_tr;
7715
7716 tr->dir = tracefs_create_dir(name, trace_instance_dir);
7717 if (!tr->dir)
7718 goto out_free_tr;
7719
7720 ret = event_trace_add_tracer(tr->dir, tr);
7721 if (ret) {
7722 tracefs_remove_recursive(tr->dir);
7723 goto out_free_tr;
7724 }
7725
7726 ftrace_init_trace_array(tr);
7727
7728 init_tracer_tracefs(tr, tr->dir);
7729 init_trace_flags_index(tr);
7730 __update_tracer_options(tr);
7731
7732 list_add(&tr->list, &ftrace_trace_arrays);
7733
7734 mutex_unlock(&trace_types_lock);
7735
7736 return 0;
7737
7738 out_free_tr:
7739 free_trace_buffers(tr);
7740 free_cpumask_var(tr->tracing_cpumask);
7741 kfree(tr->name);
7742 kfree(tr);
7743
7744 out_unlock:
7745 mutex_unlock(&trace_types_lock);
7746
7747 return ret;
7748
7749 }
7750
7751 static int instance_rmdir(const char *name)
7752 {
7753 struct trace_array *tr;
7754 int found = 0;
7755 int ret;
7756 int i;
7757
7758 mutex_lock(&trace_types_lock);
7759
7760 ret = -ENODEV;
7761 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7762 if (tr->name && strcmp(tr->name, name) == 0) {
7763 found = 1;
7764 break;
7765 }
7766 }
7767 if (!found)
7768 goto out_unlock;
7769
7770 ret = -EBUSY;
7771 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7772 goto out_unlock;
7773
7774 list_del(&tr->list);
7775
7776 /* Disable all the flags that were enabled coming in */
7777 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7778 if ((1 << i) & ZEROED_TRACE_FLAGS)
7779 set_tracer_flag(tr, 1 << i, 0);
7780 }
7781
7782 tracing_set_nop(tr);
7783 clear_ftrace_function_probes(tr);
7784 event_trace_del_tracer(tr);
7785 ftrace_clear_pids(tr);
7786 ftrace_destroy_function_files(tr);
7787 tracefs_remove_recursive(tr->dir);
7788 free_trace_buffers(tr);
7789
7790 for (i = 0; i < tr->nr_topts; i++) {
7791 kfree(tr->topts[i].topts);
7792 }
7793 kfree(tr->topts);
7794
7795 free_cpumask_var(tr->tracing_cpumask);
7796 kfree(tr->name);
7797 kfree(tr);
7798
7799 ret = 0;
7800
7801 out_unlock:
7802 mutex_unlock(&trace_types_lock);
7803
7804 return ret;
7805 }
7806
7807 static __init void create_trace_instances(struct dentry *d_tracer)
7808 {
7809 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7810 instance_mkdir,
7811 instance_rmdir);
7812 if (WARN_ON(!trace_instance_dir))
7813 return;
7814 }
7815
7816 static void
7817 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7818 {
7819 int cpu;
7820
7821 trace_create_file("available_tracers", 0444, d_tracer,
7822 tr, &show_traces_fops);
7823
7824 trace_create_file("current_tracer", 0644, d_tracer,
7825 tr, &set_tracer_fops);
7826
7827 trace_create_file("tracing_cpumask", 0644, d_tracer,
7828 tr, &tracing_cpumask_fops);
7829
7830 trace_create_file("trace_options", 0644, d_tracer,
7831 tr, &tracing_iter_fops);
7832
7833 trace_create_file("trace", 0644, d_tracer,
7834 tr, &tracing_fops);
7835
7836 trace_create_file("trace_pipe", 0444, d_tracer,
7837 tr, &tracing_pipe_fops);
7838
7839 trace_create_file("buffer_size_kb", 0644, d_tracer,
7840 tr, &tracing_entries_fops);
7841
7842 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7843 tr, &tracing_total_entries_fops);
7844
7845 trace_create_file("free_buffer", 0200, d_tracer,
7846 tr, &tracing_free_buffer_fops);
7847
7848 trace_create_file("trace_marker", 0220, d_tracer,
7849 tr, &tracing_mark_fops);
7850
7851 trace_create_file("trace_marker_raw", 0220, d_tracer,
7852 tr, &tracing_mark_raw_fops);
7853
7854 trace_create_file("trace_clock", 0644, d_tracer, tr,
7855 &trace_clock_fops);
7856
7857 trace_create_file("tracing_on", 0644, d_tracer,
7858 tr, &rb_simple_fops);
7859
7860 create_trace_options_dir(tr);
7861
7862 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7863 trace_create_file("tracing_max_latency", 0644, d_tracer,
7864 &tr->max_latency, &tracing_max_lat_fops);
7865 #endif
7866
7867 if (ftrace_create_function_files(tr, d_tracer))
7868 WARN(1, "Could not allocate function filter files");
7869
7870 #ifdef CONFIG_TRACER_SNAPSHOT
7871 trace_create_file("snapshot", 0644, d_tracer,
7872 tr, &snapshot_fops);
7873 #endif
7874
7875 for_each_tracing_cpu(cpu)
7876 tracing_init_tracefs_percpu(tr, cpu);
7877
7878 ftrace_init_tracefs(tr, d_tracer);
7879 }
7880
7881 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7882 {
7883 struct vfsmount *mnt;
7884 struct file_system_type *type;
7885
7886 /*
7887 * To maintain backward compatibility for tools that mount
7888 * debugfs to get to the tracing facility, tracefs is automatically
7889 * mounted to the debugfs/tracing directory.
7890 */
7891 type = get_fs_type("tracefs");
7892 if (!type)
7893 return NULL;
7894 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7895 put_filesystem(type);
7896 if (IS_ERR(mnt))
7897 return NULL;
7898 mntget(mnt);
7899
7900 return mnt;
7901 }
7902
7903 /**
7904 * tracing_init_dentry - initialize top level trace array
7905 *
7906 * This is called when creating files or directories in the tracing
7907 * directory. It is called via fs_initcall() by any of the boot up code
7908 * and expects to return the dentry of the top level tracing directory.
7909 */
7910 struct dentry *tracing_init_dentry(void)
7911 {
7912 struct trace_array *tr = &global_trace;
7913
7914 /* The top level trace array uses NULL as parent */
7915 if (tr->dir)
7916 return NULL;
7917
7918 if (WARN_ON(!tracefs_initialized()) ||
7919 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7920 WARN_ON(!debugfs_initialized())))
7921 return ERR_PTR(-ENODEV);
7922
7923 /*
7924 * As there may still be users that expect the tracing
7925 * files to exist in debugfs/tracing, we must automount
7926 * the tracefs file system there, so older tools still
7927 * work with the newer kerenl.
7928 */
7929 tr->dir = debugfs_create_automount("tracing", NULL,
7930 trace_automount, NULL);
7931 if (!tr->dir) {
7932 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7933 return ERR_PTR(-ENOMEM);
7934 }
7935
7936 return NULL;
7937 }
7938
7939 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7940 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7941
7942 static void __init trace_eval_init(void)
7943 {
7944 int len;
7945
7946 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7947 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7948 }
7949
7950 #ifdef CONFIG_MODULES
7951 static void trace_module_add_evals(struct module *mod)
7952 {
7953 if (!mod->num_trace_evals)
7954 return;
7955
7956 /*
7957 * Modules with bad taint do not have events created, do
7958 * not bother with enums either.
7959 */
7960 if (trace_module_has_bad_taint(mod))
7961 return;
7962
7963 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7964 }
7965
7966 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7967 static void trace_module_remove_evals(struct module *mod)
7968 {
7969 union trace_eval_map_item *map;
7970 union trace_eval_map_item **last = &trace_eval_maps;
7971
7972 if (!mod->num_trace_evals)
7973 return;
7974
7975 mutex_lock(&trace_eval_mutex);
7976
7977 map = trace_eval_maps;
7978
7979 while (map) {
7980 if (map->head.mod == mod)
7981 break;
7982 map = trace_eval_jmp_to_tail(map);
7983 last = &map->tail.next;
7984 map = map->tail.next;
7985 }
7986 if (!map)
7987 goto out;
7988
7989 *last = trace_eval_jmp_to_tail(map)->tail.next;
7990 kfree(map);
7991 out:
7992 mutex_unlock(&trace_eval_mutex);
7993 }
7994 #else
7995 static inline void trace_module_remove_evals(struct module *mod) { }
7996 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
7997
7998 static int trace_module_notify(struct notifier_block *self,
7999 unsigned long val, void *data)
8000 {
8001 struct module *mod = data;
8002
8003 switch (val) {
8004 case MODULE_STATE_COMING:
8005 trace_module_add_evals(mod);
8006 break;
8007 case MODULE_STATE_GOING:
8008 trace_module_remove_evals(mod);
8009 break;
8010 }
8011
8012 return 0;
8013 }
8014
8015 static struct notifier_block trace_module_nb = {
8016 .notifier_call = trace_module_notify,
8017 .priority = 0,
8018 };
8019 #endif /* CONFIG_MODULES */
8020
8021 static __init int tracer_init_tracefs(void)
8022 {
8023 struct dentry *d_tracer;
8024
8025 trace_access_lock_init();
8026
8027 d_tracer = tracing_init_dentry();
8028 if (IS_ERR(d_tracer))
8029 return 0;
8030
8031 init_tracer_tracefs(&global_trace, d_tracer);
8032 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8033
8034 trace_create_file("tracing_thresh", 0644, d_tracer,
8035 &global_trace, &tracing_thresh_fops);
8036
8037 trace_create_file("README", 0444, d_tracer,
8038 NULL, &tracing_readme_fops);
8039
8040 trace_create_file("saved_cmdlines", 0444, d_tracer,
8041 NULL, &tracing_saved_cmdlines_fops);
8042
8043 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8044 NULL, &tracing_saved_cmdlines_size_fops);
8045
8046 trace_create_file("saved_tgids", 0444, d_tracer,
8047 NULL, &tracing_saved_tgids_fops);
8048
8049 trace_eval_init();
8050
8051 trace_create_eval_file(d_tracer);
8052
8053 #ifdef CONFIG_MODULES
8054 register_module_notifier(&trace_module_nb);
8055 #endif
8056
8057 #ifdef CONFIG_DYNAMIC_FTRACE
8058 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8059 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8060 #endif
8061
8062 create_trace_instances(d_tracer);
8063
8064 update_tracer_options(&global_trace);
8065
8066 return 0;
8067 }
8068
8069 static int trace_panic_handler(struct notifier_block *this,
8070 unsigned long event, void *unused)
8071 {
8072 if (ftrace_dump_on_oops)
8073 ftrace_dump(ftrace_dump_on_oops);
8074 return NOTIFY_OK;
8075 }
8076
8077 static struct notifier_block trace_panic_notifier = {
8078 .notifier_call = trace_panic_handler,
8079 .next = NULL,
8080 .priority = 150 /* priority: INT_MAX >= x >= 0 */
8081 };
8082
8083 static int trace_die_handler(struct notifier_block *self,
8084 unsigned long val,
8085 void *data)
8086 {
8087 switch (val) {
8088 case DIE_OOPS:
8089 if (ftrace_dump_on_oops)
8090 ftrace_dump(ftrace_dump_on_oops);
8091 break;
8092 default:
8093 break;
8094 }
8095 return NOTIFY_OK;
8096 }
8097
8098 static struct notifier_block trace_die_notifier = {
8099 .notifier_call = trace_die_handler,
8100 .priority = 200
8101 };
8102
8103 /*
8104 * printk is set to max of 1024, we really don't need it that big.
8105 * Nothing should be printing 1000 characters anyway.
8106 */
8107 #define TRACE_MAX_PRINT 1000
8108
8109 /*
8110 * Define here KERN_TRACE so that we have one place to modify
8111 * it if we decide to change what log level the ftrace dump
8112 * should be at.
8113 */
8114 #define KERN_TRACE KERN_EMERG
8115
8116 void
8117 trace_printk_seq(struct trace_seq *s)
8118 {
8119 /* Probably should print a warning here. */
8120 if (s->seq.len >= TRACE_MAX_PRINT)
8121 s->seq.len = TRACE_MAX_PRINT;
8122
8123 /*
8124 * More paranoid code. Although the buffer size is set to
8125 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8126 * an extra layer of protection.
8127 */
8128 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8129 s->seq.len = s->seq.size - 1;
8130
8131 /* should be zero ended, but we are paranoid. */
8132 s->buffer[s->seq.len] = 0;
8133
8134 printk(KERN_TRACE "%s", s->buffer);
8135
8136 trace_seq_init(s);
8137 }
8138
8139 void trace_init_global_iter(struct trace_iterator *iter)
8140 {
8141 iter->tr = &global_trace;
8142 iter->trace = iter->tr->current_trace;
8143 iter->cpu_file = RING_BUFFER_ALL_CPUS;
8144 iter->trace_buffer = &global_trace.trace_buffer;
8145
8146 if (iter->trace && iter->trace->open)
8147 iter->trace->open(iter);
8148
8149 /* Annotate start of buffers if we had overruns */
8150 if (ring_buffer_overruns(iter->trace_buffer->buffer))
8151 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8152
8153 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8154 if (trace_clocks[iter->tr->clock_id].in_ns)
8155 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8156 }
8157
8158 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8159 {
8160 /* use static because iter can be a bit big for the stack */
8161 static struct trace_iterator iter;
8162 static atomic_t dump_running;
8163 struct trace_array *tr = &global_trace;
8164 unsigned int old_userobj;
8165 unsigned long flags;
8166 int cnt = 0, cpu;
8167
8168 /* Only allow one dump user at a time. */
8169 if (atomic_inc_return(&dump_running) != 1) {
8170 atomic_dec(&dump_running);
8171 return;
8172 }
8173
8174 /*
8175 * Always turn off tracing when we dump.
8176 * We don't need to show trace output of what happens
8177 * between multiple crashes.
8178 *
8179 * If the user does a sysrq-z, then they can re-enable
8180 * tracing with echo 1 > tracing_on.
8181 */
8182 tracing_off();
8183
8184 local_irq_save(flags);
8185
8186 /* Simulate the iterator */
8187 trace_init_global_iter(&iter);
8188
8189 for_each_tracing_cpu(cpu) {
8190 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8191 }
8192
8193 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8194
8195 /* don't look at user memory in panic mode */
8196 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8197
8198 switch (oops_dump_mode) {
8199 case DUMP_ALL:
8200 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8201 break;
8202 case DUMP_ORIG:
8203 iter.cpu_file = raw_smp_processor_id();
8204 break;
8205 case DUMP_NONE:
8206 goto out_enable;
8207 default:
8208 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8209 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8210 }
8211
8212 printk(KERN_TRACE "Dumping ftrace buffer:\n");
8213
8214 /* Did function tracer already get disabled? */
8215 if (ftrace_is_dead()) {
8216 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8217 printk("# MAY BE MISSING FUNCTION EVENTS\n");
8218 }
8219
8220 /*
8221 * We need to stop all tracing on all CPUS to read the
8222 * the next buffer. This is a bit expensive, but is
8223 * not done often. We fill all what we can read,
8224 * and then release the locks again.
8225 */
8226
8227 while (!trace_empty(&iter)) {
8228
8229 if (!cnt)
8230 printk(KERN_TRACE "---------------------------------\n");
8231
8232 cnt++;
8233
8234 /* reset all but tr, trace, and overruns */
8235 memset(&iter.seq, 0,
8236 sizeof(struct trace_iterator) -
8237 offsetof(struct trace_iterator, seq));
8238 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8239 iter.pos = -1;
8240
8241 if (trace_find_next_entry_inc(&iter) != NULL) {
8242 int ret;
8243
8244 ret = print_trace_line(&iter);
8245 if (ret != TRACE_TYPE_NO_CONSUME)
8246 trace_consume(&iter);
8247 }
8248 touch_nmi_watchdog();
8249
8250 trace_printk_seq(&iter.seq);
8251 }
8252
8253 if (!cnt)
8254 printk(KERN_TRACE " (ftrace buffer empty)\n");
8255 else
8256 printk(KERN_TRACE "---------------------------------\n");
8257
8258 out_enable:
8259 tr->trace_flags |= old_userobj;
8260
8261 for_each_tracing_cpu(cpu) {
8262 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8263 }
8264 atomic_dec(&dump_running);
8265 local_irq_restore(flags);
8266 }
8267 EXPORT_SYMBOL_GPL(ftrace_dump);
8268
8269 __init static int tracer_alloc_buffers(void)
8270 {
8271 int ring_buf_size;
8272 int ret = -ENOMEM;
8273
8274 /*
8275 * Make sure we don't accidently add more trace options
8276 * than we have bits for.
8277 */
8278 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8279
8280 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8281 goto out;
8282
8283 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8284 goto out_free_buffer_mask;
8285
8286 /* Only allocate trace_printk buffers if a trace_printk exists */
8287 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8288 /* Must be called before global_trace.buffer is allocated */
8289 trace_printk_init_buffers();
8290
8291 /* To save memory, keep the ring buffer size to its minimum */
8292 if (ring_buffer_expanded)
8293 ring_buf_size = trace_buf_size;
8294 else
8295 ring_buf_size = 1;
8296
8297 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8298 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8299
8300 raw_spin_lock_init(&global_trace.start_lock);
8301
8302 /*
8303 * The prepare callbacks allocates some memory for the ring buffer. We
8304 * don't free the buffer if the if the CPU goes down. If we were to free
8305 * the buffer, then the user would lose any trace that was in the
8306 * buffer. The memory will be removed once the "instance" is removed.
8307 */
8308 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8309 "trace/RB:preapre", trace_rb_cpu_prepare,
8310 NULL);
8311 if (ret < 0)
8312 goto out_free_cpumask;
8313 /* Used for event triggers */
8314 ret = -ENOMEM;
8315 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8316 if (!temp_buffer)
8317 goto out_rm_hp_state;
8318
8319 if (trace_create_savedcmd() < 0)
8320 goto out_free_temp_buffer;
8321
8322 /* TODO: make the number of buffers hot pluggable with CPUS */
8323 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8324 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8325 WARN_ON(1);
8326 goto out_free_savedcmd;
8327 }
8328
8329 if (global_trace.buffer_disabled)
8330 tracing_off();
8331
8332 if (trace_boot_clock) {
8333 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8334 if (ret < 0)
8335 pr_warn("Trace clock %s not defined, going back to default\n",
8336 trace_boot_clock);
8337 }
8338
8339 /*
8340 * register_tracer() might reference current_trace, so it
8341 * needs to be set before we register anything. This is
8342 * just a bootstrap of current_trace anyway.
8343 */
8344 global_trace.current_trace = &nop_trace;
8345
8346 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8347
8348 ftrace_init_global_array_ops(&global_trace);
8349
8350 init_trace_flags_index(&global_trace);
8351
8352 register_tracer(&nop_trace);
8353
8354 /* Function tracing may start here (via kernel command line) */
8355 init_function_trace();
8356
8357 /* All seems OK, enable tracing */
8358 tracing_disabled = 0;
8359
8360 atomic_notifier_chain_register(&panic_notifier_list,
8361 &trace_panic_notifier);
8362
8363 register_die_notifier(&trace_die_notifier);
8364
8365 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8366
8367 INIT_LIST_HEAD(&global_trace.systems);
8368 INIT_LIST_HEAD(&global_trace.events);
8369 list_add(&global_trace.list, &ftrace_trace_arrays);
8370
8371 apply_trace_boot_options();
8372
8373 register_snapshot_cmd();
8374
8375 return 0;
8376
8377 out_free_savedcmd:
8378 free_saved_cmdlines_buffer(savedcmd);
8379 out_free_temp_buffer:
8380 ring_buffer_free(temp_buffer);
8381 out_rm_hp_state:
8382 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8383 out_free_cpumask:
8384 free_cpumask_var(global_trace.tracing_cpumask);
8385 out_free_buffer_mask:
8386 free_cpumask_var(tracing_buffer_mask);
8387 out:
8388 return ret;
8389 }
8390
8391 void __init early_trace_init(void)
8392 {
8393 if (tracepoint_printk) {
8394 tracepoint_print_iter =
8395 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8396 if (WARN_ON(!tracepoint_print_iter))
8397 tracepoint_printk = 0;
8398 else
8399 static_key_enable(&tracepoint_printk_key.key);
8400 }
8401 tracer_alloc_buffers();
8402 }
8403
8404 void __init trace_init(void)
8405 {
8406 trace_event_init();
8407 }
8408
8409 __init static int clear_boot_tracer(void)
8410 {
8411 /*
8412 * The default tracer at boot buffer is an init section.
8413 * This function is called in lateinit. If we did not
8414 * find the boot tracer, then clear it out, to prevent
8415 * later registration from accessing the buffer that is
8416 * about to be freed.
8417 */
8418 if (!default_bootup_tracer)
8419 return 0;
8420
8421 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8422 default_bootup_tracer);
8423 default_bootup_tracer = NULL;
8424
8425 return 0;
8426 }
8427
8428 fs_initcall(tracer_init_tracefs);
8429 late_initcall_sync(clear_boot_tracer);