]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - kernel/trace/trace.c
tracing: Fix a memory leak by early error exit in trace_pid_write()
[mirror_ubuntu-jammy-kernel.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52 * On boot up, the ring buffer is set to the minimum size, so that
53 * we do not waste memory on systems that are not using tracing.
54 */
55 bool ring_buffer_expanded;
56
57 /*
58 * We need to change this state when a selftest is running.
59 * A selftest will lurk into the ring-buffer to count the
60 * entries inserted during the selftest although some concurrent
61 * insertions into the ring-buffer such as trace_printk could occurred
62 * at the same time, giving false positive or negative results.
63 */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67 * If a tracer is running, we do not want to run SELFTEST.
68 */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 return 0;
85 }
86
87 /*
88 * To prevent the comm cache from being overwritten when no
89 * tracing is active, only save the comm when a trace event
90 * occurred.
91 */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95 * Kill all tracing for good (never come back).
96 * It is initialized to 1 but will turn to zero if the initialization
97 * of the tracer is successful. But that is the only place that sets
98 * this back to zero.
99 */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly tracing_buffer_mask;
103
104 /*
105 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106 *
107 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108 * is set, then ftrace_dump is called. This will output the contents
109 * of the ftrace buffers to the console. This is very useful for
110 * capturing traces that lead to crashes and outputing it to a
111 * serial console.
112 *
113 * It is default off, but you can enable it with either specifying
114 * "ftrace_dump_on_oops" in the kernel command line, or setting
115 * /proc/sys/kernel/ftrace_dump_on_oops
116 * Set 1 if you want to dump buffers of all CPUs
117 * Set 2 if you want to dump the buffer of the CPU that triggered oops
118 */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 struct module *mod;
129 unsigned long length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135 /*
136 * "end" is first and points to NULL as it must be different
137 * than "mod" or "eval_string"
138 */
139 union trace_eval_map_item *next;
140 const char *end; /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146 * The trace_eval_maps are saved in an array with two extra elements,
147 * one at the beginning, and one at the end. The beginning item contains
148 * the count of the saved maps (head.length), and the module they
149 * belong to if not built in (head.mod). The ending item contains a
150 * pointer to the next array of saved eval_map items.
151 */
152 union trace_eval_map_item {
153 struct trace_eval_map map;
154 struct trace_eval_map_head head;
155 struct trace_eval_map_tail tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE 100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172 default_bootup_tracer = bootup_tracer_buf;
173 /* We are using ftrace early, expand it */
174 ring_buffer_expanded = true;
175 return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181 if (*str++ != '=' || !*str) {
182 ftrace_dump_on_oops = DUMP_ALL;
183 return 1;
184 }
185
186 if (!strcmp("orig_cpu", str)) {
187 ftrace_dump_on_oops = DUMP_ORIG;
188 return 1;
189 }
190
191 return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198 __disable_trace_on_warning = 1;
199 return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205 allocate_snapshot = true;
206 /* We also need the main ring buffer expanded */
207 ring_buffer_expanded = true;
208 return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228 trace_boot_clock = trace_boot_clock_buf;
229 return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236 tracepoint_printk = 1;
237 return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(u64 nsec)
242 {
243 nsec += 500;
244 do_div(nsec, 1000);
245 return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS \
250 (FUNCTION_DEFAULT_FLAGS | \
251 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
252 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
253 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
254 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
258 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263
264 /*
265 * The global_trace is the descriptor that holds the top-level tracing
266 * buffers for the live tracing.
267 */
268 static struct trace_array global_trace = {
269 .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271
272 LIST_HEAD(ftrace_trace_arrays);
273
274 int trace_array_get(struct trace_array *this_tr)
275 {
276 struct trace_array *tr;
277 int ret = -ENODEV;
278
279 mutex_lock(&trace_types_lock);
280 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281 if (tr == this_tr) {
282 tr->ref++;
283 ret = 0;
284 break;
285 }
286 }
287 mutex_unlock(&trace_types_lock);
288
289 return ret;
290 }
291
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294 WARN_ON(!this_tr->ref);
295 this_tr->ref--;
296 }
297
298 void trace_array_put(struct trace_array *this_tr)
299 {
300 mutex_lock(&trace_types_lock);
301 __trace_array_put(this_tr);
302 mutex_unlock(&trace_types_lock);
303 }
304
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306 struct ring_buffer *buffer,
307 struct ring_buffer_event *event)
308 {
309 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310 !filter_match_preds(call->filter, rec)) {
311 __trace_event_discard_commit(buffer, event);
312 return 1;
313 }
314
315 return 0;
316 }
317
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320 vfree(pid_list->pids);
321 kfree(pid_list);
322 }
323
324 /**
325 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326 * @filtered_pids: The list of pids to check
327 * @search_pid: The PID to find in @filtered_pids
328 *
329 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330 */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334 /*
335 * If pid_max changed after filtered_pids was created, we
336 * by default ignore all pids greater than the previous pid_max.
337 */
338 if (search_pid >= filtered_pids->pid_max)
339 return false;
340
341 return test_bit(search_pid, filtered_pids->pids);
342 }
343
344 /**
345 * trace_ignore_this_task - should a task be ignored for tracing
346 * @filtered_pids: The list of pids to check
347 * @task: The task that should be ignored if not filtered
348 *
349 * Checks if @task should be traced or not from @filtered_pids.
350 * Returns true if @task should *NOT* be traced.
351 * Returns false if @task should be traced.
352 */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356 /*
357 * Return false, because if filtered_pids does not exist,
358 * all pids are good to trace.
359 */
360 if (!filtered_pids)
361 return false;
362
363 return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365
366 /**
367 * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368 * @pid_list: The list to modify
369 * @self: The current task for fork or NULL for exit
370 * @task: The task to add or remove
371 *
372 * If adding a task, if @self is defined, the task is only added if @self
373 * is also included in @pid_list. This happens on fork and tasks should
374 * only be added when the parent is listed. If @self is NULL, then the
375 * @task pid will be removed from the list, which would happen on exit
376 * of a task.
377 */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379 struct task_struct *self,
380 struct task_struct *task)
381 {
382 if (!pid_list)
383 return;
384
385 /* For forks, we only add if the forking task is listed */
386 if (self) {
387 if (!trace_find_filtered_pid(pid_list, self->pid))
388 return;
389 }
390
391 /* Sorry, but we don't support pid_max changing after setting */
392 if (task->pid >= pid_list->pid_max)
393 return;
394
395 /* "self" is set for forks, and NULL for exits */
396 if (self)
397 set_bit(task->pid, pid_list->pids);
398 else
399 clear_bit(task->pid, pid_list->pids);
400 }
401
402 /**
403 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404 * @pid_list: The pid list to show
405 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406 * @pos: The position of the file
407 *
408 * This is used by the seq_file "next" operation to iterate the pids
409 * listed in a trace_pid_list structure.
410 *
411 * Returns the pid+1 as we want to display pid of zero, but NULL would
412 * stop the iteration.
413 */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416 unsigned long pid = (unsigned long)v;
417
418 (*pos)++;
419
420 /* pid already is +1 of the actual prevous bit */
421 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422
423 /* Return pid + 1 to allow zero to be represented */
424 if (pid < pid_list->pid_max)
425 return (void *)(pid + 1);
426
427 return NULL;
428 }
429
430 /**
431 * trace_pid_start - Used for seq_file to start reading pid lists
432 * @pid_list: The pid list to show
433 * @pos: The position of the file
434 *
435 * This is used by seq_file "start" operation to start the iteration
436 * of listing pids.
437 *
438 * Returns the pid+1 as we want to display pid of zero, but NULL would
439 * stop the iteration.
440 */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443 unsigned long pid;
444 loff_t l = 0;
445
446 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447 if (pid >= pid_list->pid_max)
448 return NULL;
449
450 /* Return pid + 1 so that zero can be the exit value */
451 for (pid++; pid && l < *pos;
452 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453 ;
454 return (void *)pid;
455 }
456
457 /**
458 * trace_pid_show - show the current pid in seq_file processing
459 * @m: The seq_file structure to write into
460 * @v: A void pointer of the pid (+1) value to display
461 *
462 * Can be directly used by seq_file operations to display the current
463 * pid value.
464 */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467 unsigned long pid = (unsigned long)v - 1;
468
469 seq_printf(m, "%lu\n", pid);
470 return 0;
471 }
472
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE 127
475
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477 struct trace_pid_list **new_pid_list,
478 const char __user *ubuf, size_t cnt)
479 {
480 struct trace_pid_list *pid_list;
481 struct trace_parser parser;
482 unsigned long val;
483 int nr_pids = 0;
484 ssize_t read = 0;
485 ssize_t ret = 0;
486 loff_t pos;
487 pid_t pid;
488
489 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490 return -ENOMEM;
491
492 /*
493 * Always recreate a new array. The write is an all or nothing
494 * operation. Always create a new array when adding new pids by
495 * the user. If the operation fails, then the current list is
496 * not modified.
497 */
498 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499 if (!pid_list) {
500 trace_parser_put(&parser);
501 return -ENOMEM;
502 }
503
504 pid_list->pid_max = READ_ONCE(pid_max);
505
506 /* Only truncating will shrink pid_max */
507 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
508 pid_list->pid_max = filtered_pids->pid_max;
509
510 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
511 if (!pid_list->pids) {
512 trace_parser_put(&parser);
513 kfree(pid_list);
514 return -ENOMEM;
515 }
516
517 if (filtered_pids) {
518 /* copy the current bits to the new max */
519 for_each_set_bit(pid, filtered_pids->pids,
520 filtered_pids->pid_max) {
521 set_bit(pid, pid_list->pids);
522 nr_pids++;
523 }
524 }
525
526 while (cnt > 0) {
527
528 pos = 0;
529
530 ret = trace_get_user(&parser, ubuf, cnt, &pos);
531 if (ret < 0 || !trace_parser_loaded(&parser))
532 break;
533
534 read += ret;
535 ubuf += ret;
536 cnt -= ret;
537
538 ret = -EINVAL;
539 if (kstrtoul(parser.buffer, 0, &val))
540 break;
541 if (val >= pid_list->pid_max)
542 break;
543
544 pid = (pid_t)val;
545
546 set_bit(pid, pid_list->pids);
547 nr_pids++;
548
549 trace_parser_clear(&parser);
550 ret = 0;
551 }
552 trace_parser_put(&parser);
553
554 if (ret < 0) {
555 trace_free_pid_list(pid_list);
556 return ret;
557 }
558
559 if (!nr_pids) {
560 /* Cleared the list of pids */
561 trace_free_pid_list(pid_list);
562 read = ret;
563 pid_list = NULL;
564 }
565
566 *new_pid_list = pid_list;
567
568 return read;
569 }
570
571 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 {
573 u64 ts;
574
575 /* Early boot up does not have a buffer yet */
576 if (!buf->buffer)
577 return trace_clock_local();
578
579 ts = ring_buffer_time_stamp(buf->buffer, cpu);
580 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
581
582 return ts;
583 }
584
585 u64 ftrace_now(int cpu)
586 {
587 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 }
589
590 /**
591 * tracing_is_enabled - Show if global_trace has been disabled
592 *
593 * Shows if the global trace has been enabled or not. It uses the
594 * mirror flag "buffer_disabled" to be used in fast paths such as for
595 * the irqsoff tracer. But it may be inaccurate due to races. If you
596 * need to know the accurate state, use tracing_is_on() which is a little
597 * slower, but accurate.
598 */
599 int tracing_is_enabled(void)
600 {
601 /*
602 * For quick access (irqsoff uses this in fast path), just
603 * return the mirror variable of the state of the ring buffer.
604 * It's a little racy, but we don't really care.
605 */
606 smp_rmb();
607 return !global_trace.buffer_disabled;
608 }
609
610 /*
611 * trace_buf_size is the size in bytes that is allocated
612 * for a buffer. Note, the number of bytes is always rounded
613 * to page size.
614 *
615 * This number is purposely set to a low number of 16384.
616 * If the dump on oops happens, it will be much appreciated
617 * to not have to wait for all that output. Anyway this can be
618 * boot time and run time configurable.
619 */
620 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
621
622 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
623
624 /* trace_types holds a link list of available tracers. */
625 static struct tracer *trace_types __read_mostly;
626
627 /*
628 * trace_types_lock is used to protect the trace_types list.
629 */
630 DEFINE_MUTEX(trace_types_lock);
631
632 /*
633 * serialize the access of the ring buffer
634 *
635 * ring buffer serializes readers, but it is low level protection.
636 * The validity of the events (which returns by ring_buffer_peek() ..etc)
637 * are not protected by ring buffer.
638 *
639 * The content of events may become garbage if we allow other process consumes
640 * these events concurrently:
641 * A) the page of the consumed events may become a normal page
642 * (not reader page) in ring buffer, and this page will be rewrited
643 * by events producer.
644 * B) The page of the consumed events may become a page for splice_read,
645 * and this page will be returned to system.
646 *
647 * These primitives allow multi process access to different cpu ring buffer
648 * concurrently.
649 *
650 * These primitives don't distinguish read-only and read-consume access.
651 * Multi read-only access are also serialized.
652 */
653
654 #ifdef CONFIG_SMP
655 static DECLARE_RWSEM(all_cpu_access_lock);
656 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
657
658 static inline void trace_access_lock(int cpu)
659 {
660 if (cpu == RING_BUFFER_ALL_CPUS) {
661 /* gain it for accessing the whole ring buffer. */
662 down_write(&all_cpu_access_lock);
663 } else {
664 /* gain it for accessing a cpu ring buffer. */
665
666 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
667 down_read(&all_cpu_access_lock);
668
669 /* Secondly block other access to this @cpu ring buffer. */
670 mutex_lock(&per_cpu(cpu_access_lock, cpu));
671 }
672 }
673
674 static inline void trace_access_unlock(int cpu)
675 {
676 if (cpu == RING_BUFFER_ALL_CPUS) {
677 up_write(&all_cpu_access_lock);
678 } else {
679 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
680 up_read(&all_cpu_access_lock);
681 }
682 }
683
684 static inline void trace_access_lock_init(void)
685 {
686 int cpu;
687
688 for_each_possible_cpu(cpu)
689 mutex_init(&per_cpu(cpu_access_lock, cpu));
690 }
691
692 #else
693
694 static DEFINE_MUTEX(access_lock);
695
696 static inline void trace_access_lock(int cpu)
697 {
698 (void)cpu;
699 mutex_lock(&access_lock);
700 }
701
702 static inline void trace_access_unlock(int cpu)
703 {
704 (void)cpu;
705 mutex_unlock(&access_lock);
706 }
707
708 static inline void trace_access_lock_init(void)
709 {
710 }
711
712 #endif
713
714 #ifdef CONFIG_STACKTRACE
715 static void __ftrace_trace_stack(struct ring_buffer *buffer,
716 unsigned long flags,
717 int skip, int pc, struct pt_regs *regs);
718 static inline void ftrace_trace_stack(struct trace_array *tr,
719 struct ring_buffer *buffer,
720 unsigned long flags,
721 int skip, int pc, struct pt_regs *regs);
722
723 #else
724 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
725 unsigned long flags,
726 int skip, int pc, struct pt_regs *regs)
727 {
728 }
729 static inline void ftrace_trace_stack(struct trace_array *tr,
730 struct ring_buffer *buffer,
731 unsigned long flags,
732 int skip, int pc, struct pt_regs *regs)
733 {
734 }
735
736 #endif
737
738 static __always_inline void
739 trace_event_setup(struct ring_buffer_event *event,
740 int type, unsigned long flags, int pc)
741 {
742 struct trace_entry *ent = ring_buffer_event_data(event);
743
744 tracing_generic_entry_update(ent, flags, pc);
745 ent->type = type;
746 }
747
748 static __always_inline struct ring_buffer_event *
749 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
750 int type,
751 unsigned long len,
752 unsigned long flags, int pc)
753 {
754 struct ring_buffer_event *event;
755
756 event = ring_buffer_lock_reserve(buffer, len);
757 if (event != NULL)
758 trace_event_setup(event, type, flags, pc);
759
760 return event;
761 }
762
763 void tracer_tracing_on(struct trace_array *tr)
764 {
765 if (tr->trace_buffer.buffer)
766 ring_buffer_record_on(tr->trace_buffer.buffer);
767 /*
768 * This flag is looked at when buffers haven't been allocated
769 * yet, or by some tracers (like irqsoff), that just want to
770 * know if the ring buffer has been disabled, but it can handle
771 * races of where it gets disabled but we still do a record.
772 * As the check is in the fast path of the tracers, it is more
773 * important to be fast than accurate.
774 */
775 tr->buffer_disabled = 0;
776 /* Make the flag seen by readers */
777 smp_wmb();
778 }
779
780 /**
781 * tracing_on - enable tracing buffers
782 *
783 * This function enables tracing buffers that may have been
784 * disabled with tracing_off.
785 */
786 void tracing_on(void)
787 {
788 tracer_tracing_on(&global_trace);
789 }
790 EXPORT_SYMBOL_GPL(tracing_on);
791
792
793 static __always_inline void
794 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
795 {
796 __this_cpu_write(trace_taskinfo_save, true);
797
798 /* If this is the temp buffer, we need to commit fully */
799 if (this_cpu_read(trace_buffered_event) == event) {
800 /* Length is in event->array[0] */
801 ring_buffer_write(buffer, event->array[0], &event->array[1]);
802 /* Release the temp buffer */
803 this_cpu_dec(trace_buffered_event_cnt);
804 } else
805 ring_buffer_unlock_commit(buffer, event);
806 }
807
808 /**
809 * __trace_puts - write a constant string into the trace buffer.
810 * @ip: The address of the caller
811 * @str: The constant string to write
812 * @size: The size of the string.
813 */
814 int __trace_puts(unsigned long ip, const char *str, int size)
815 {
816 struct ring_buffer_event *event;
817 struct ring_buffer *buffer;
818 struct print_entry *entry;
819 unsigned long irq_flags;
820 int alloc;
821 int pc;
822
823 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
824 return 0;
825
826 pc = preempt_count();
827
828 if (unlikely(tracing_selftest_running || tracing_disabled))
829 return 0;
830
831 alloc = sizeof(*entry) + size + 2; /* possible \n added */
832
833 local_save_flags(irq_flags);
834 buffer = global_trace.trace_buffer.buffer;
835 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
836 irq_flags, pc);
837 if (!event)
838 return 0;
839
840 entry = ring_buffer_event_data(event);
841 entry->ip = ip;
842
843 memcpy(&entry->buf, str, size);
844
845 /* Add a newline if necessary */
846 if (entry->buf[size - 1] != '\n') {
847 entry->buf[size] = '\n';
848 entry->buf[size + 1] = '\0';
849 } else
850 entry->buf[size] = '\0';
851
852 __buffer_unlock_commit(buffer, event);
853 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854
855 return size;
856 }
857 EXPORT_SYMBOL_GPL(__trace_puts);
858
859 /**
860 * __trace_bputs - write the pointer to a constant string into trace buffer
861 * @ip: The address of the caller
862 * @str: The constant string to write to the buffer to
863 */
864 int __trace_bputs(unsigned long ip, const char *str)
865 {
866 struct ring_buffer_event *event;
867 struct ring_buffer *buffer;
868 struct bputs_entry *entry;
869 unsigned long irq_flags;
870 int size = sizeof(struct bputs_entry);
871 int pc;
872
873 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
874 return 0;
875
876 pc = preempt_count();
877
878 if (unlikely(tracing_selftest_running || tracing_disabled))
879 return 0;
880
881 local_save_flags(irq_flags);
882 buffer = global_trace.trace_buffer.buffer;
883 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
884 irq_flags, pc);
885 if (!event)
886 return 0;
887
888 entry = ring_buffer_event_data(event);
889 entry->ip = ip;
890 entry->str = str;
891
892 __buffer_unlock_commit(buffer, event);
893 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894
895 return 1;
896 }
897 EXPORT_SYMBOL_GPL(__trace_bputs);
898
899 #ifdef CONFIG_TRACER_SNAPSHOT
900 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
901 {
902 struct tracer *tracer = tr->current_trace;
903 unsigned long flags;
904
905 if (in_nmi()) {
906 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
907 internal_trace_puts("*** snapshot is being ignored ***\n");
908 return;
909 }
910
911 if (!tr->allocated_snapshot) {
912 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
913 internal_trace_puts("*** stopping trace here! ***\n");
914 tracing_off();
915 return;
916 }
917
918 /* Note, snapshot can not be used when the tracer uses it */
919 if (tracer->use_max_tr) {
920 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
921 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
922 return;
923 }
924
925 local_irq_save(flags);
926 update_max_tr(tr, current, smp_processor_id(), cond_data);
927 local_irq_restore(flags);
928 }
929
930 void tracing_snapshot_instance(struct trace_array *tr)
931 {
932 tracing_snapshot_instance_cond(tr, NULL);
933 }
934
935 /**
936 * tracing_snapshot - take a snapshot of the current buffer.
937 *
938 * This causes a swap between the snapshot buffer and the current live
939 * tracing buffer. You can use this to take snapshots of the live
940 * trace when some condition is triggered, but continue to trace.
941 *
942 * Note, make sure to allocate the snapshot with either
943 * a tracing_snapshot_alloc(), or by doing it manually
944 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
945 *
946 * If the snapshot buffer is not allocated, it will stop tracing.
947 * Basically making a permanent snapshot.
948 */
949 void tracing_snapshot(void)
950 {
951 struct trace_array *tr = &global_trace;
952
953 tracing_snapshot_instance(tr);
954 }
955 EXPORT_SYMBOL_GPL(tracing_snapshot);
956
957 /**
958 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
959 * @tr: The tracing instance to snapshot
960 * @cond_data: The data to be tested conditionally, and possibly saved
961 *
962 * This is the same as tracing_snapshot() except that the snapshot is
963 * conditional - the snapshot will only happen if the
964 * cond_snapshot.update() implementation receiving the cond_data
965 * returns true, which means that the trace array's cond_snapshot
966 * update() operation used the cond_data to determine whether the
967 * snapshot should be taken, and if it was, presumably saved it along
968 * with the snapshot.
969 */
970 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
971 {
972 tracing_snapshot_instance_cond(tr, cond_data);
973 }
974 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
975
976 /**
977 * tracing_snapshot_cond_data - get the user data associated with a snapshot
978 * @tr: The tracing instance
979 *
980 * When the user enables a conditional snapshot using
981 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
982 * with the snapshot. This accessor is used to retrieve it.
983 *
984 * Should not be called from cond_snapshot.update(), since it takes
985 * the tr->max_lock lock, which the code calling
986 * cond_snapshot.update() has already done.
987 *
988 * Returns the cond_data associated with the trace array's snapshot.
989 */
990 void *tracing_cond_snapshot_data(struct trace_array *tr)
991 {
992 void *cond_data = NULL;
993
994 arch_spin_lock(&tr->max_lock);
995
996 if (tr->cond_snapshot)
997 cond_data = tr->cond_snapshot->cond_data;
998
999 arch_spin_unlock(&tr->max_lock);
1000
1001 return cond_data;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1004
1005 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1006 struct trace_buffer *size_buf, int cpu_id);
1007 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1008
1009 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1010 {
1011 int ret;
1012
1013 if (!tr->allocated_snapshot) {
1014
1015 /* allocate spare buffer */
1016 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1017 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1018 if (ret < 0)
1019 return ret;
1020
1021 tr->allocated_snapshot = true;
1022 }
1023
1024 return 0;
1025 }
1026
1027 static void free_snapshot(struct trace_array *tr)
1028 {
1029 /*
1030 * We don't free the ring buffer. instead, resize it because
1031 * The max_tr ring buffer has some state (e.g. ring->clock) and
1032 * we want preserve it.
1033 */
1034 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1035 set_buffer_entries(&tr->max_buffer, 1);
1036 tracing_reset_online_cpus(&tr->max_buffer);
1037 tr->allocated_snapshot = false;
1038 }
1039
1040 /**
1041 * tracing_alloc_snapshot - allocate snapshot buffer.
1042 *
1043 * This only allocates the snapshot buffer if it isn't already
1044 * allocated - it doesn't also take a snapshot.
1045 *
1046 * This is meant to be used in cases where the snapshot buffer needs
1047 * to be set up for events that can't sleep but need to be able to
1048 * trigger a snapshot.
1049 */
1050 int tracing_alloc_snapshot(void)
1051 {
1052 struct trace_array *tr = &global_trace;
1053 int ret;
1054
1055 ret = tracing_alloc_snapshot_instance(tr);
1056 WARN_ON(ret < 0);
1057
1058 return ret;
1059 }
1060 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1061
1062 /**
1063 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1064 *
1065 * This is similar to tracing_snapshot(), but it will allocate the
1066 * snapshot buffer if it isn't already allocated. Use this only
1067 * where it is safe to sleep, as the allocation may sleep.
1068 *
1069 * This causes a swap between the snapshot buffer and the current live
1070 * tracing buffer. You can use this to take snapshots of the live
1071 * trace when some condition is triggered, but continue to trace.
1072 */
1073 void tracing_snapshot_alloc(void)
1074 {
1075 int ret;
1076
1077 ret = tracing_alloc_snapshot();
1078 if (ret < 0)
1079 return;
1080
1081 tracing_snapshot();
1082 }
1083 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1084
1085 /**
1086 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1087 * @tr: The tracing instance
1088 * @cond_data: User data to associate with the snapshot
1089 * @update: Implementation of the cond_snapshot update function
1090 *
1091 * Check whether the conditional snapshot for the given instance has
1092 * already been enabled, or if the current tracer is already using a
1093 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1094 * save the cond_data and update function inside.
1095 *
1096 * Returns 0 if successful, error otherwise.
1097 */
1098 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1099 cond_update_fn_t update)
1100 {
1101 struct cond_snapshot *cond_snapshot;
1102 int ret = 0;
1103
1104 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1105 if (!cond_snapshot)
1106 return -ENOMEM;
1107
1108 cond_snapshot->cond_data = cond_data;
1109 cond_snapshot->update = update;
1110
1111 mutex_lock(&trace_types_lock);
1112
1113 ret = tracing_alloc_snapshot_instance(tr);
1114 if (ret)
1115 goto fail_unlock;
1116
1117 if (tr->current_trace->use_max_tr) {
1118 ret = -EBUSY;
1119 goto fail_unlock;
1120 }
1121
1122 /*
1123 * The cond_snapshot can only change to NULL without the
1124 * trace_types_lock. We don't care if we race with it going
1125 * to NULL, but we want to make sure that it's not set to
1126 * something other than NULL when we get here, which we can
1127 * do safely with only holding the trace_types_lock and not
1128 * having to take the max_lock.
1129 */
1130 if (tr->cond_snapshot) {
1131 ret = -EBUSY;
1132 goto fail_unlock;
1133 }
1134
1135 arch_spin_lock(&tr->max_lock);
1136 tr->cond_snapshot = cond_snapshot;
1137 arch_spin_unlock(&tr->max_lock);
1138
1139 mutex_unlock(&trace_types_lock);
1140
1141 return ret;
1142
1143 fail_unlock:
1144 mutex_unlock(&trace_types_lock);
1145 kfree(cond_snapshot);
1146 return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1149
1150 /**
1151 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1152 * @tr: The tracing instance
1153 *
1154 * Check whether the conditional snapshot for the given instance is
1155 * enabled; if so, free the cond_snapshot associated with it,
1156 * otherwise return -EINVAL.
1157 *
1158 * Returns 0 if successful, error otherwise.
1159 */
1160 int tracing_snapshot_cond_disable(struct trace_array *tr)
1161 {
1162 int ret = 0;
1163
1164 arch_spin_lock(&tr->max_lock);
1165
1166 if (!tr->cond_snapshot)
1167 ret = -EINVAL;
1168 else {
1169 kfree(tr->cond_snapshot);
1170 tr->cond_snapshot = NULL;
1171 }
1172
1173 arch_spin_unlock(&tr->max_lock);
1174
1175 return ret;
1176 }
1177 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1178 #else
1179 void tracing_snapshot(void)
1180 {
1181 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1182 }
1183 EXPORT_SYMBOL_GPL(tracing_snapshot);
1184 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1185 {
1186 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1187 }
1188 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1189 int tracing_alloc_snapshot(void)
1190 {
1191 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1192 return -ENODEV;
1193 }
1194 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1195 void tracing_snapshot_alloc(void)
1196 {
1197 /* Give warning */
1198 tracing_snapshot();
1199 }
1200 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1201 void *tracing_cond_snapshot_data(struct trace_array *tr)
1202 {
1203 return NULL;
1204 }
1205 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1206 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1207 {
1208 return -ENODEV;
1209 }
1210 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213 return false;
1214 }
1215 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1216 #endif /* CONFIG_TRACER_SNAPSHOT */
1217
1218 void tracer_tracing_off(struct trace_array *tr)
1219 {
1220 if (tr->trace_buffer.buffer)
1221 ring_buffer_record_off(tr->trace_buffer.buffer);
1222 /*
1223 * This flag is looked at when buffers haven't been allocated
1224 * yet, or by some tracers (like irqsoff), that just want to
1225 * know if the ring buffer has been disabled, but it can handle
1226 * races of where it gets disabled but we still do a record.
1227 * As the check is in the fast path of the tracers, it is more
1228 * important to be fast than accurate.
1229 */
1230 tr->buffer_disabled = 1;
1231 /* Make the flag seen by readers */
1232 smp_wmb();
1233 }
1234
1235 /**
1236 * tracing_off - turn off tracing buffers
1237 *
1238 * This function stops the tracing buffers from recording data.
1239 * It does not disable any overhead the tracers themselves may
1240 * be causing. This function simply causes all recording to
1241 * the ring buffers to fail.
1242 */
1243 void tracing_off(void)
1244 {
1245 tracer_tracing_off(&global_trace);
1246 }
1247 EXPORT_SYMBOL_GPL(tracing_off);
1248
1249 void disable_trace_on_warning(void)
1250 {
1251 if (__disable_trace_on_warning)
1252 tracing_off();
1253 }
1254
1255 /**
1256 * tracer_tracing_is_on - show real state of ring buffer enabled
1257 * @tr : the trace array to know if ring buffer is enabled
1258 *
1259 * Shows real state of the ring buffer if it is enabled or not.
1260 */
1261 bool tracer_tracing_is_on(struct trace_array *tr)
1262 {
1263 if (tr->trace_buffer.buffer)
1264 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1265 return !tr->buffer_disabled;
1266 }
1267
1268 /**
1269 * tracing_is_on - show state of ring buffers enabled
1270 */
1271 int tracing_is_on(void)
1272 {
1273 return tracer_tracing_is_on(&global_trace);
1274 }
1275 EXPORT_SYMBOL_GPL(tracing_is_on);
1276
1277 static int __init set_buf_size(char *str)
1278 {
1279 unsigned long buf_size;
1280
1281 if (!str)
1282 return 0;
1283 buf_size = memparse(str, &str);
1284 /* nr_entries can not be zero */
1285 if (buf_size == 0)
1286 return 0;
1287 trace_buf_size = buf_size;
1288 return 1;
1289 }
1290 __setup("trace_buf_size=", set_buf_size);
1291
1292 static int __init set_tracing_thresh(char *str)
1293 {
1294 unsigned long threshold;
1295 int ret;
1296
1297 if (!str)
1298 return 0;
1299 ret = kstrtoul(str, 0, &threshold);
1300 if (ret < 0)
1301 return 0;
1302 tracing_thresh = threshold * 1000;
1303 return 1;
1304 }
1305 __setup("tracing_thresh=", set_tracing_thresh);
1306
1307 unsigned long nsecs_to_usecs(unsigned long nsecs)
1308 {
1309 return nsecs / 1000;
1310 }
1311
1312 /*
1313 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1314 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1315 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1316 * of strings in the order that the evals (enum) were defined.
1317 */
1318 #undef C
1319 #define C(a, b) b
1320
1321 /* These must match the bit postions in trace_iterator_flags */
1322 static const char *trace_options[] = {
1323 TRACE_FLAGS
1324 NULL
1325 };
1326
1327 static struct {
1328 u64 (*func)(void);
1329 const char *name;
1330 int in_ns; /* is this clock in nanoseconds? */
1331 } trace_clocks[] = {
1332 { trace_clock_local, "local", 1 },
1333 { trace_clock_global, "global", 1 },
1334 { trace_clock_counter, "counter", 0 },
1335 { trace_clock_jiffies, "uptime", 0 },
1336 { trace_clock, "perf", 1 },
1337 { ktime_get_mono_fast_ns, "mono", 1 },
1338 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1339 { ktime_get_boot_fast_ns, "boot", 1 },
1340 ARCH_TRACE_CLOCKS
1341 };
1342
1343 bool trace_clock_in_ns(struct trace_array *tr)
1344 {
1345 if (trace_clocks[tr->clock_id].in_ns)
1346 return true;
1347
1348 return false;
1349 }
1350
1351 /*
1352 * trace_parser_get_init - gets the buffer for trace parser
1353 */
1354 int trace_parser_get_init(struct trace_parser *parser, int size)
1355 {
1356 memset(parser, 0, sizeof(*parser));
1357
1358 parser->buffer = kmalloc(size, GFP_KERNEL);
1359 if (!parser->buffer)
1360 return 1;
1361
1362 parser->size = size;
1363 return 0;
1364 }
1365
1366 /*
1367 * trace_parser_put - frees the buffer for trace parser
1368 */
1369 void trace_parser_put(struct trace_parser *parser)
1370 {
1371 kfree(parser->buffer);
1372 parser->buffer = NULL;
1373 }
1374
1375 /*
1376 * trace_get_user - reads the user input string separated by space
1377 * (matched by isspace(ch))
1378 *
1379 * For each string found the 'struct trace_parser' is updated,
1380 * and the function returns.
1381 *
1382 * Returns number of bytes read.
1383 *
1384 * See kernel/trace/trace.h for 'struct trace_parser' details.
1385 */
1386 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1387 size_t cnt, loff_t *ppos)
1388 {
1389 char ch;
1390 size_t read = 0;
1391 ssize_t ret;
1392
1393 if (!*ppos)
1394 trace_parser_clear(parser);
1395
1396 ret = get_user(ch, ubuf++);
1397 if (ret)
1398 goto out;
1399
1400 read++;
1401 cnt--;
1402
1403 /*
1404 * The parser is not finished with the last write,
1405 * continue reading the user input without skipping spaces.
1406 */
1407 if (!parser->cont) {
1408 /* skip white space */
1409 while (cnt && isspace(ch)) {
1410 ret = get_user(ch, ubuf++);
1411 if (ret)
1412 goto out;
1413 read++;
1414 cnt--;
1415 }
1416
1417 parser->idx = 0;
1418
1419 /* only spaces were written */
1420 if (isspace(ch) || !ch) {
1421 *ppos += read;
1422 ret = read;
1423 goto out;
1424 }
1425 }
1426
1427 /* read the non-space input */
1428 while (cnt && !isspace(ch) && ch) {
1429 if (parser->idx < parser->size - 1)
1430 parser->buffer[parser->idx++] = ch;
1431 else {
1432 ret = -EINVAL;
1433 goto out;
1434 }
1435 ret = get_user(ch, ubuf++);
1436 if (ret)
1437 goto out;
1438 read++;
1439 cnt--;
1440 }
1441
1442 /* We either got finished input or we have to wait for another call. */
1443 if (isspace(ch) || !ch) {
1444 parser->buffer[parser->idx] = 0;
1445 parser->cont = false;
1446 } else if (parser->idx < parser->size - 1) {
1447 parser->cont = true;
1448 parser->buffer[parser->idx++] = ch;
1449 /* Make sure the parsed string always terminates with '\0'. */
1450 parser->buffer[parser->idx] = 0;
1451 } else {
1452 ret = -EINVAL;
1453 goto out;
1454 }
1455
1456 *ppos += read;
1457 ret = read;
1458
1459 out:
1460 return ret;
1461 }
1462
1463 /* TODO add a seq_buf_to_buffer() */
1464 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1465 {
1466 int len;
1467
1468 if (trace_seq_used(s) <= s->seq.readpos)
1469 return -EBUSY;
1470
1471 len = trace_seq_used(s) - s->seq.readpos;
1472 if (cnt > len)
1473 cnt = len;
1474 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1475
1476 s->seq.readpos += cnt;
1477 return cnt;
1478 }
1479
1480 unsigned long __read_mostly tracing_thresh;
1481
1482 #ifdef CONFIG_TRACER_MAX_TRACE
1483 /*
1484 * Copy the new maximum trace into the separate maximum-trace
1485 * structure. (this way the maximum trace is permanently saved,
1486 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1487 */
1488 static void
1489 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1490 {
1491 struct trace_buffer *trace_buf = &tr->trace_buffer;
1492 struct trace_buffer *max_buf = &tr->max_buffer;
1493 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1494 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1495
1496 max_buf->cpu = cpu;
1497 max_buf->time_start = data->preempt_timestamp;
1498
1499 max_data->saved_latency = tr->max_latency;
1500 max_data->critical_start = data->critical_start;
1501 max_data->critical_end = data->critical_end;
1502
1503 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1504 max_data->pid = tsk->pid;
1505 /*
1506 * If tsk == current, then use current_uid(), as that does not use
1507 * RCU. The irq tracer can be called out of RCU scope.
1508 */
1509 if (tsk == current)
1510 max_data->uid = current_uid();
1511 else
1512 max_data->uid = task_uid(tsk);
1513
1514 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1515 max_data->policy = tsk->policy;
1516 max_data->rt_priority = tsk->rt_priority;
1517
1518 /* record this tasks comm */
1519 tracing_record_cmdline(tsk);
1520 }
1521
1522 /**
1523 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1524 * @tr: tracer
1525 * @tsk: the task with the latency
1526 * @cpu: The cpu that initiated the trace.
1527 * @cond_data: User data associated with a conditional snapshot
1528 *
1529 * Flip the buffers between the @tr and the max_tr and record information
1530 * about which task was the cause of this latency.
1531 */
1532 void
1533 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1534 void *cond_data)
1535 {
1536 if (tr->stop_count)
1537 return;
1538
1539 WARN_ON_ONCE(!irqs_disabled());
1540
1541 if (!tr->allocated_snapshot) {
1542 /* Only the nop tracer should hit this when disabling */
1543 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1544 return;
1545 }
1546
1547 arch_spin_lock(&tr->max_lock);
1548
1549 /* Inherit the recordable setting from trace_buffer */
1550 if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1551 ring_buffer_record_on(tr->max_buffer.buffer);
1552 else
1553 ring_buffer_record_off(tr->max_buffer.buffer);
1554
1555 #ifdef CONFIG_TRACER_SNAPSHOT
1556 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1557 goto out_unlock;
1558 #endif
1559 swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1560
1561 __update_max_tr(tr, tsk, cpu);
1562
1563 out_unlock:
1564 arch_spin_unlock(&tr->max_lock);
1565 }
1566
1567 /**
1568 * update_max_tr_single - only copy one trace over, and reset the rest
1569 * @tr - tracer
1570 * @tsk - task with the latency
1571 * @cpu - the cpu of the buffer to copy.
1572 *
1573 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1574 */
1575 void
1576 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1577 {
1578 int ret;
1579
1580 if (tr->stop_count)
1581 return;
1582
1583 WARN_ON_ONCE(!irqs_disabled());
1584 if (!tr->allocated_snapshot) {
1585 /* Only the nop tracer should hit this when disabling */
1586 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1587 return;
1588 }
1589
1590 arch_spin_lock(&tr->max_lock);
1591
1592 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1593
1594 if (ret == -EBUSY) {
1595 /*
1596 * We failed to swap the buffer due to a commit taking
1597 * place on this CPU. We fail to record, but we reset
1598 * the max trace buffer (no one writes directly to it)
1599 * and flag that it failed.
1600 */
1601 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1602 "Failed to swap buffers due to commit in progress\n");
1603 }
1604
1605 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1606
1607 __update_max_tr(tr, tsk, cpu);
1608 arch_spin_unlock(&tr->max_lock);
1609 }
1610 #endif /* CONFIG_TRACER_MAX_TRACE */
1611
1612 static int wait_on_pipe(struct trace_iterator *iter, int full)
1613 {
1614 /* Iterators are static, they should be filled or empty */
1615 if (trace_buffer_iter(iter, iter->cpu_file))
1616 return 0;
1617
1618 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1619 full);
1620 }
1621
1622 #ifdef CONFIG_FTRACE_STARTUP_TEST
1623 static bool selftests_can_run;
1624
1625 struct trace_selftests {
1626 struct list_head list;
1627 struct tracer *type;
1628 };
1629
1630 static LIST_HEAD(postponed_selftests);
1631
1632 static int save_selftest(struct tracer *type)
1633 {
1634 struct trace_selftests *selftest;
1635
1636 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1637 if (!selftest)
1638 return -ENOMEM;
1639
1640 selftest->type = type;
1641 list_add(&selftest->list, &postponed_selftests);
1642 return 0;
1643 }
1644
1645 static int run_tracer_selftest(struct tracer *type)
1646 {
1647 struct trace_array *tr = &global_trace;
1648 struct tracer *saved_tracer = tr->current_trace;
1649 int ret;
1650
1651 if (!type->selftest || tracing_selftest_disabled)
1652 return 0;
1653
1654 /*
1655 * If a tracer registers early in boot up (before scheduling is
1656 * initialized and such), then do not run its selftests yet.
1657 * Instead, run it a little later in the boot process.
1658 */
1659 if (!selftests_can_run)
1660 return save_selftest(type);
1661
1662 /*
1663 * Run a selftest on this tracer.
1664 * Here we reset the trace buffer, and set the current
1665 * tracer to be this tracer. The tracer can then run some
1666 * internal tracing to verify that everything is in order.
1667 * If we fail, we do not register this tracer.
1668 */
1669 tracing_reset_online_cpus(&tr->trace_buffer);
1670
1671 tr->current_trace = type;
1672
1673 #ifdef CONFIG_TRACER_MAX_TRACE
1674 if (type->use_max_tr) {
1675 /* If we expanded the buffers, make sure the max is expanded too */
1676 if (ring_buffer_expanded)
1677 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1678 RING_BUFFER_ALL_CPUS);
1679 tr->allocated_snapshot = true;
1680 }
1681 #endif
1682
1683 /* the test is responsible for initializing and enabling */
1684 pr_info("Testing tracer %s: ", type->name);
1685 ret = type->selftest(type, tr);
1686 /* the test is responsible for resetting too */
1687 tr->current_trace = saved_tracer;
1688 if (ret) {
1689 printk(KERN_CONT "FAILED!\n");
1690 /* Add the warning after printing 'FAILED' */
1691 WARN_ON(1);
1692 return -1;
1693 }
1694 /* Only reset on passing, to avoid touching corrupted buffers */
1695 tracing_reset_online_cpus(&tr->trace_buffer);
1696
1697 #ifdef CONFIG_TRACER_MAX_TRACE
1698 if (type->use_max_tr) {
1699 tr->allocated_snapshot = false;
1700
1701 /* Shrink the max buffer again */
1702 if (ring_buffer_expanded)
1703 ring_buffer_resize(tr->max_buffer.buffer, 1,
1704 RING_BUFFER_ALL_CPUS);
1705 }
1706 #endif
1707
1708 printk(KERN_CONT "PASSED\n");
1709 return 0;
1710 }
1711
1712 static __init int init_trace_selftests(void)
1713 {
1714 struct trace_selftests *p, *n;
1715 struct tracer *t, **last;
1716 int ret;
1717
1718 selftests_can_run = true;
1719
1720 mutex_lock(&trace_types_lock);
1721
1722 if (list_empty(&postponed_selftests))
1723 goto out;
1724
1725 pr_info("Running postponed tracer tests:\n");
1726
1727 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1728 ret = run_tracer_selftest(p->type);
1729 /* If the test fails, then warn and remove from available_tracers */
1730 if (ret < 0) {
1731 WARN(1, "tracer: %s failed selftest, disabling\n",
1732 p->type->name);
1733 last = &trace_types;
1734 for (t = trace_types; t; t = t->next) {
1735 if (t == p->type) {
1736 *last = t->next;
1737 break;
1738 }
1739 last = &t->next;
1740 }
1741 }
1742 list_del(&p->list);
1743 kfree(p);
1744 }
1745
1746 out:
1747 mutex_unlock(&trace_types_lock);
1748
1749 return 0;
1750 }
1751 core_initcall(init_trace_selftests);
1752 #else
1753 static inline int run_tracer_selftest(struct tracer *type)
1754 {
1755 return 0;
1756 }
1757 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1758
1759 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1760
1761 static void __init apply_trace_boot_options(void);
1762
1763 /**
1764 * register_tracer - register a tracer with the ftrace system.
1765 * @type - the plugin for the tracer
1766 *
1767 * Register a new plugin tracer.
1768 */
1769 int __init register_tracer(struct tracer *type)
1770 {
1771 struct tracer *t;
1772 int ret = 0;
1773
1774 if (!type->name) {
1775 pr_info("Tracer must have a name\n");
1776 return -1;
1777 }
1778
1779 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1780 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1781 return -1;
1782 }
1783
1784 mutex_lock(&trace_types_lock);
1785
1786 tracing_selftest_running = true;
1787
1788 for (t = trace_types; t; t = t->next) {
1789 if (strcmp(type->name, t->name) == 0) {
1790 /* already found */
1791 pr_info("Tracer %s already registered\n",
1792 type->name);
1793 ret = -1;
1794 goto out;
1795 }
1796 }
1797
1798 if (!type->set_flag)
1799 type->set_flag = &dummy_set_flag;
1800 if (!type->flags) {
1801 /*allocate a dummy tracer_flags*/
1802 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1803 if (!type->flags) {
1804 ret = -ENOMEM;
1805 goto out;
1806 }
1807 type->flags->val = 0;
1808 type->flags->opts = dummy_tracer_opt;
1809 } else
1810 if (!type->flags->opts)
1811 type->flags->opts = dummy_tracer_opt;
1812
1813 /* store the tracer for __set_tracer_option */
1814 type->flags->trace = type;
1815
1816 ret = run_tracer_selftest(type);
1817 if (ret < 0)
1818 goto out;
1819
1820 type->next = trace_types;
1821 trace_types = type;
1822 add_tracer_options(&global_trace, type);
1823
1824 out:
1825 tracing_selftest_running = false;
1826 mutex_unlock(&trace_types_lock);
1827
1828 if (ret || !default_bootup_tracer)
1829 goto out_unlock;
1830
1831 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1832 goto out_unlock;
1833
1834 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1835 /* Do we want this tracer to start on bootup? */
1836 tracing_set_tracer(&global_trace, type->name);
1837 default_bootup_tracer = NULL;
1838
1839 apply_trace_boot_options();
1840
1841 /* disable other selftests, since this will break it. */
1842 tracing_selftest_disabled = true;
1843 #ifdef CONFIG_FTRACE_STARTUP_TEST
1844 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1845 type->name);
1846 #endif
1847
1848 out_unlock:
1849 return ret;
1850 }
1851
1852 void tracing_reset(struct trace_buffer *buf, int cpu)
1853 {
1854 struct ring_buffer *buffer = buf->buffer;
1855
1856 if (!buffer)
1857 return;
1858
1859 ring_buffer_record_disable(buffer);
1860
1861 /* Make sure all commits have finished */
1862 synchronize_rcu();
1863 ring_buffer_reset_cpu(buffer, cpu);
1864
1865 ring_buffer_record_enable(buffer);
1866 }
1867
1868 void tracing_reset_online_cpus(struct trace_buffer *buf)
1869 {
1870 struct ring_buffer *buffer = buf->buffer;
1871 int cpu;
1872
1873 if (!buffer)
1874 return;
1875
1876 ring_buffer_record_disable(buffer);
1877
1878 /* Make sure all commits have finished */
1879 synchronize_rcu();
1880
1881 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1882
1883 for_each_online_cpu(cpu)
1884 ring_buffer_reset_cpu(buffer, cpu);
1885
1886 ring_buffer_record_enable(buffer);
1887 }
1888
1889 /* Must have trace_types_lock held */
1890 void tracing_reset_all_online_cpus(void)
1891 {
1892 struct trace_array *tr;
1893
1894 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1895 if (!tr->clear_trace)
1896 continue;
1897 tr->clear_trace = false;
1898 tracing_reset_online_cpus(&tr->trace_buffer);
1899 #ifdef CONFIG_TRACER_MAX_TRACE
1900 tracing_reset_online_cpus(&tr->max_buffer);
1901 #endif
1902 }
1903 }
1904
1905 static int *tgid_map;
1906
1907 #define SAVED_CMDLINES_DEFAULT 128
1908 #define NO_CMDLINE_MAP UINT_MAX
1909 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1910 struct saved_cmdlines_buffer {
1911 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1912 unsigned *map_cmdline_to_pid;
1913 unsigned cmdline_num;
1914 int cmdline_idx;
1915 char *saved_cmdlines;
1916 };
1917 static struct saved_cmdlines_buffer *savedcmd;
1918
1919 /* temporary disable recording */
1920 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1921
1922 static inline char *get_saved_cmdlines(int idx)
1923 {
1924 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1925 }
1926
1927 static inline void set_cmdline(int idx, const char *cmdline)
1928 {
1929 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1930 }
1931
1932 static int allocate_cmdlines_buffer(unsigned int val,
1933 struct saved_cmdlines_buffer *s)
1934 {
1935 s->map_cmdline_to_pid = kmalloc_array(val,
1936 sizeof(*s->map_cmdline_to_pid),
1937 GFP_KERNEL);
1938 if (!s->map_cmdline_to_pid)
1939 return -ENOMEM;
1940
1941 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1942 if (!s->saved_cmdlines) {
1943 kfree(s->map_cmdline_to_pid);
1944 return -ENOMEM;
1945 }
1946
1947 s->cmdline_idx = 0;
1948 s->cmdline_num = val;
1949 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1950 sizeof(s->map_pid_to_cmdline));
1951 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1952 val * sizeof(*s->map_cmdline_to_pid));
1953
1954 return 0;
1955 }
1956
1957 static int trace_create_savedcmd(void)
1958 {
1959 int ret;
1960
1961 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1962 if (!savedcmd)
1963 return -ENOMEM;
1964
1965 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1966 if (ret < 0) {
1967 kfree(savedcmd);
1968 savedcmd = NULL;
1969 return -ENOMEM;
1970 }
1971
1972 return 0;
1973 }
1974
1975 int is_tracing_stopped(void)
1976 {
1977 return global_trace.stop_count;
1978 }
1979
1980 /**
1981 * tracing_start - quick start of the tracer
1982 *
1983 * If tracing is enabled but was stopped by tracing_stop,
1984 * this will start the tracer back up.
1985 */
1986 void tracing_start(void)
1987 {
1988 struct ring_buffer *buffer;
1989 unsigned long flags;
1990
1991 if (tracing_disabled)
1992 return;
1993
1994 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1995 if (--global_trace.stop_count) {
1996 if (global_trace.stop_count < 0) {
1997 /* Someone screwed up their debugging */
1998 WARN_ON_ONCE(1);
1999 global_trace.stop_count = 0;
2000 }
2001 goto out;
2002 }
2003
2004 /* Prevent the buffers from switching */
2005 arch_spin_lock(&global_trace.max_lock);
2006
2007 buffer = global_trace.trace_buffer.buffer;
2008 if (buffer)
2009 ring_buffer_record_enable(buffer);
2010
2011 #ifdef CONFIG_TRACER_MAX_TRACE
2012 buffer = global_trace.max_buffer.buffer;
2013 if (buffer)
2014 ring_buffer_record_enable(buffer);
2015 #endif
2016
2017 arch_spin_unlock(&global_trace.max_lock);
2018
2019 out:
2020 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2021 }
2022
2023 static void tracing_start_tr(struct trace_array *tr)
2024 {
2025 struct ring_buffer *buffer;
2026 unsigned long flags;
2027
2028 if (tracing_disabled)
2029 return;
2030
2031 /* If global, we need to also start the max tracer */
2032 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2033 return tracing_start();
2034
2035 raw_spin_lock_irqsave(&tr->start_lock, flags);
2036
2037 if (--tr->stop_count) {
2038 if (tr->stop_count < 0) {
2039 /* Someone screwed up their debugging */
2040 WARN_ON_ONCE(1);
2041 tr->stop_count = 0;
2042 }
2043 goto out;
2044 }
2045
2046 buffer = tr->trace_buffer.buffer;
2047 if (buffer)
2048 ring_buffer_record_enable(buffer);
2049
2050 out:
2051 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2052 }
2053
2054 /**
2055 * tracing_stop - quick stop of the tracer
2056 *
2057 * Light weight way to stop tracing. Use in conjunction with
2058 * tracing_start.
2059 */
2060 void tracing_stop(void)
2061 {
2062 struct ring_buffer *buffer;
2063 unsigned long flags;
2064
2065 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2066 if (global_trace.stop_count++)
2067 goto out;
2068
2069 /* Prevent the buffers from switching */
2070 arch_spin_lock(&global_trace.max_lock);
2071
2072 buffer = global_trace.trace_buffer.buffer;
2073 if (buffer)
2074 ring_buffer_record_disable(buffer);
2075
2076 #ifdef CONFIG_TRACER_MAX_TRACE
2077 buffer = global_trace.max_buffer.buffer;
2078 if (buffer)
2079 ring_buffer_record_disable(buffer);
2080 #endif
2081
2082 arch_spin_unlock(&global_trace.max_lock);
2083
2084 out:
2085 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2086 }
2087
2088 static void tracing_stop_tr(struct trace_array *tr)
2089 {
2090 struct ring_buffer *buffer;
2091 unsigned long flags;
2092
2093 /* If global, we need to also stop the max tracer */
2094 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2095 return tracing_stop();
2096
2097 raw_spin_lock_irqsave(&tr->start_lock, flags);
2098 if (tr->stop_count++)
2099 goto out;
2100
2101 buffer = tr->trace_buffer.buffer;
2102 if (buffer)
2103 ring_buffer_record_disable(buffer);
2104
2105 out:
2106 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2107 }
2108
2109 static int trace_save_cmdline(struct task_struct *tsk)
2110 {
2111 unsigned pid, idx;
2112
2113 /* treat recording of idle task as a success */
2114 if (!tsk->pid)
2115 return 1;
2116
2117 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2118 return 0;
2119
2120 /*
2121 * It's not the end of the world if we don't get
2122 * the lock, but we also don't want to spin
2123 * nor do we want to disable interrupts,
2124 * so if we miss here, then better luck next time.
2125 */
2126 if (!arch_spin_trylock(&trace_cmdline_lock))
2127 return 0;
2128
2129 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2130 if (idx == NO_CMDLINE_MAP) {
2131 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2132
2133 /*
2134 * Check whether the cmdline buffer at idx has a pid
2135 * mapped. We are going to overwrite that entry so we
2136 * need to clear the map_pid_to_cmdline. Otherwise we
2137 * would read the new comm for the old pid.
2138 */
2139 pid = savedcmd->map_cmdline_to_pid[idx];
2140 if (pid != NO_CMDLINE_MAP)
2141 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2142
2143 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2144 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2145
2146 savedcmd->cmdline_idx = idx;
2147 }
2148
2149 set_cmdline(idx, tsk->comm);
2150
2151 arch_spin_unlock(&trace_cmdline_lock);
2152
2153 return 1;
2154 }
2155
2156 static void __trace_find_cmdline(int pid, char comm[])
2157 {
2158 unsigned map;
2159
2160 if (!pid) {
2161 strcpy(comm, "<idle>");
2162 return;
2163 }
2164
2165 if (WARN_ON_ONCE(pid < 0)) {
2166 strcpy(comm, "<XXX>");
2167 return;
2168 }
2169
2170 if (pid > PID_MAX_DEFAULT) {
2171 strcpy(comm, "<...>");
2172 return;
2173 }
2174
2175 map = savedcmd->map_pid_to_cmdline[pid];
2176 if (map != NO_CMDLINE_MAP)
2177 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2178 else
2179 strcpy(comm, "<...>");
2180 }
2181
2182 void trace_find_cmdline(int pid, char comm[])
2183 {
2184 preempt_disable();
2185 arch_spin_lock(&trace_cmdline_lock);
2186
2187 __trace_find_cmdline(pid, comm);
2188
2189 arch_spin_unlock(&trace_cmdline_lock);
2190 preempt_enable();
2191 }
2192
2193 int trace_find_tgid(int pid)
2194 {
2195 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2196 return 0;
2197
2198 return tgid_map[pid];
2199 }
2200
2201 static int trace_save_tgid(struct task_struct *tsk)
2202 {
2203 /* treat recording of idle task as a success */
2204 if (!tsk->pid)
2205 return 1;
2206
2207 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2208 return 0;
2209
2210 tgid_map[tsk->pid] = tsk->tgid;
2211 return 1;
2212 }
2213
2214 static bool tracing_record_taskinfo_skip(int flags)
2215 {
2216 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2217 return true;
2218 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2219 return true;
2220 if (!__this_cpu_read(trace_taskinfo_save))
2221 return true;
2222 return false;
2223 }
2224
2225 /**
2226 * tracing_record_taskinfo - record the task info of a task
2227 *
2228 * @task - task to record
2229 * @flags - TRACE_RECORD_CMDLINE for recording comm
2230 * - TRACE_RECORD_TGID for recording tgid
2231 */
2232 void tracing_record_taskinfo(struct task_struct *task, int flags)
2233 {
2234 bool done;
2235
2236 if (tracing_record_taskinfo_skip(flags))
2237 return;
2238
2239 /*
2240 * Record as much task information as possible. If some fail, continue
2241 * to try to record the others.
2242 */
2243 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2244 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2245
2246 /* If recording any information failed, retry again soon. */
2247 if (!done)
2248 return;
2249
2250 __this_cpu_write(trace_taskinfo_save, false);
2251 }
2252
2253 /**
2254 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2255 *
2256 * @prev - previous task during sched_switch
2257 * @next - next task during sched_switch
2258 * @flags - TRACE_RECORD_CMDLINE for recording comm
2259 * TRACE_RECORD_TGID for recording tgid
2260 */
2261 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2262 struct task_struct *next, int flags)
2263 {
2264 bool done;
2265
2266 if (tracing_record_taskinfo_skip(flags))
2267 return;
2268
2269 /*
2270 * Record as much task information as possible. If some fail, continue
2271 * to try to record the others.
2272 */
2273 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2274 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2275 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2276 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2277
2278 /* If recording any information failed, retry again soon. */
2279 if (!done)
2280 return;
2281
2282 __this_cpu_write(trace_taskinfo_save, false);
2283 }
2284
2285 /* Helpers to record a specific task information */
2286 void tracing_record_cmdline(struct task_struct *task)
2287 {
2288 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2289 }
2290
2291 void tracing_record_tgid(struct task_struct *task)
2292 {
2293 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2294 }
2295
2296 /*
2297 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2298 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2299 * simplifies those functions and keeps them in sync.
2300 */
2301 enum print_line_t trace_handle_return(struct trace_seq *s)
2302 {
2303 return trace_seq_has_overflowed(s) ?
2304 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2305 }
2306 EXPORT_SYMBOL_GPL(trace_handle_return);
2307
2308 void
2309 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2310 int pc)
2311 {
2312 struct task_struct *tsk = current;
2313
2314 entry->preempt_count = pc & 0xff;
2315 entry->pid = (tsk) ? tsk->pid : 0;
2316 entry->flags =
2317 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2318 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2319 #else
2320 TRACE_FLAG_IRQS_NOSUPPORT |
2321 #endif
2322 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2323 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2324 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2325 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2326 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2327 }
2328 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2329
2330 struct ring_buffer_event *
2331 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2332 int type,
2333 unsigned long len,
2334 unsigned long flags, int pc)
2335 {
2336 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2337 }
2338
2339 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2340 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2341 static int trace_buffered_event_ref;
2342
2343 /**
2344 * trace_buffered_event_enable - enable buffering events
2345 *
2346 * When events are being filtered, it is quicker to use a temporary
2347 * buffer to write the event data into if there's a likely chance
2348 * that it will not be committed. The discard of the ring buffer
2349 * is not as fast as committing, and is much slower than copying
2350 * a commit.
2351 *
2352 * When an event is to be filtered, allocate per cpu buffers to
2353 * write the event data into, and if the event is filtered and discarded
2354 * it is simply dropped, otherwise, the entire data is to be committed
2355 * in one shot.
2356 */
2357 void trace_buffered_event_enable(void)
2358 {
2359 struct ring_buffer_event *event;
2360 struct page *page;
2361 int cpu;
2362
2363 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2364
2365 if (trace_buffered_event_ref++)
2366 return;
2367
2368 for_each_tracing_cpu(cpu) {
2369 page = alloc_pages_node(cpu_to_node(cpu),
2370 GFP_KERNEL | __GFP_NORETRY, 0);
2371 if (!page)
2372 goto failed;
2373
2374 event = page_address(page);
2375 memset(event, 0, sizeof(*event));
2376
2377 per_cpu(trace_buffered_event, cpu) = event;
2378
2379 preempt_disable();
2380 if (cpu == smp_processor_id() &&
2381 this_cpu_read(trace_buffered_event) !=
2382 per_cpu(trace_buffered_event, cpu))
2383 WARN_ON_ONCE(1);
2384 preempt_enable();
2385 }
2386
2387 return;
2388 failed:
2389 trace_buffered_event_disable();
2390 }
2391
2392 static void enable_trace_buffered_event(void *data)
2393 {
2394 /* Probably not needed, but do it anyway */
2395 smp_rmb();
2396 this_cpu_dec(trace_buffered_event_cnt);
2397 }
2398
2399 static void disable_trace_buffered_event(void *data)
2400 {
2401 this_cpu_inc(trace_buffered_event_cnt);
2402 }
2403
2404 /**
2405 * trace_buffered_event_disable - disable buffering events
2406 *
2407 * When a filter is removed, it is faster to not use the buffered
2408 * events, and to commit directly into the ring buffer. Free up
2409 * the temp buffers when there are no more users. This requires
2410 * special synchronization with current events.
2411 */
2412 void trace_buffered_event_disable(void)
2413 {
2414 int cpu;
2415
2416 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2417
2418 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2419 return;
2420
2421 if (--trace_buffered_event_ref)
2422 return;
2423
2424 preempt_disable();
2425 /* For each CPU, set the buffer as used. */
2426 smp_call_function_many(tracing_buffer_mask,
2427 disable_trace_buffered_event, NULL, 1);
2428 preempt_enable();
2429
2430 /* Wait for all current users to finish */
2431 synchronize_rcu();
2432
2433 for_each_tracing_cpu(cpu) {
2434 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2435 per_cpu(trace_buffered_event, cpu) = NULL;
2436 }
2437 /*
2438 * Make sure trace_buffered_event is NULL before clearing
2439 * trace_buffered_event_cnt.
2440 */
2441 smp_wmb();
2442
2443 preempt_disable();
2444 /* Do the work on each cpu */
2445 smp_call_function_many(tracing_buffer_mask,
2446 enable_trace_buffered_event, NULL, 1);
2447 preempt_enable();
2448 }
2449
2450 static struct ring_buffer *temp_buffer;
2451
2452 struct ring_buffer_event *
2453 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2454 struct trace_event_file *trace_file,
2455 int type, unsigned long len,
2456 unsigned long flags, int pc)
2457 {
2458 struct ring_buffer_event *entry;
2459 int val;
2460
2461 *current_rb = trace_file->tr->trace_buffer.buffer;
2462
2463 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2464 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2465 (entry = this_cpu_read(trace_buffered_event))) {
2466 /* Try to use the per cpu buffer first */
2467 val = this_cpu_inc_return(trace_buffered_event_cnt);
2468 if (val == 1) {
2469 trace_event_setup(entry, type, flags, pc);
2470 entry->array[0] = len;
2471 return entry;
2472 }
2473 this_cpu_dec(trace_buffered_event_cnt);
2474 }
2475
2476 entry = __trace_buffer_lock_reserve(*current_rb,
2477 type, len, flags, pc);
2478 /*
2479 * If tracing is off, but we have triggers enabled
2480 * we still need to look at the event data. Use the temp_buffer
2481 * to store the trace event for the tigger to use. It's recusive
2482 * safe and will not be recorded anywhere.
2483 */
2484 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2485 *current_rb = temp_buffer;
2486 entry = __trace_buffer_lock_reserve(*current_rb,
2487 type, len, flags, pc);
2488 }
2489 return entry;
2490 }
2491 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2492
2493 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2494 static DEFINE_MUTEX(tracepoint_printk_mutex);
2495
2496 static void output_printk(struct trace_event_buffer *fbuffer)
2497 {
2498 struct trace_event_call *event_call;
2499 struct trace_event *event;
2500 unsigned long flags;
2501 struct trace_iterator *iter = tracepoint_print_iter;
2502
2503 /* We should never get here if iter is NULL */
2504 if (WARN_ON_ONCE(!iter))
2505 return;
2506
2507 event_call = fbuffer->trace_file->event_call;
2508 if (!event_call || !event_call->event.funcs ||
2509 !event_call->event.funcs->trace)
2510 return;
2511
2512 event = &fbuffer->trace_file->event_call->event;
2513
2514 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2515 trace_seq_init(&iter->seq);
2516 iter->ent = fbuffer->entry;
2517 event_call->event.funcs->trace(iter, 0, event);
2518 trace_seq_putc(&iter->seq, 0);
2519 printk("%s", iter->seq.buffer);
2520
2521 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2522 }
2523
2524 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2525 void __user *buffer, size_t *lenp,
2526 loff_t *ppos)
2527 {
2528 int save_tracepoint_printk;
2529 int ret;
2530
2531 mutex_lock(&tracepoint_printk_mutex);
2532 save_tracepoint_printk = tracepoint_printk;
2533
2534 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2535
2536 /*
2537 * This will force exiting early, as tracepoint_printk
2538 * is always zero when tracepoint_printk_iter is not allocated
2539 */
2540 if (!tracepoint_print_iter)
2541 tracepoint_printk = 0;
2542
2543 if (save_tracepoint_printk == tracepoint_printk)
2544 goto out;
2545
2546 if (tracepoint_printk)
2547 static_key_enable(&tracepoint_printk_key.key);
2548 else
2549 static_key_disable(&tracepoint_printk_key.key);
2550
2551 out:
2552 mutex_unlock(&tracepoint_printk_mutex);
2553
2554 return ret;
2555 }
2556
2557 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2558 {
2559 if (static_key_false(&tracepoint_printk_key.key))
2560 output_printk(fbuffer);
2561
2562 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2563 fbuffer->event, fbuffer->entry,
2564 fbuffer->flags, fbuffer->pc);
2565 }
2566 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2567
2568 /*
2569 * Skip 3:
2570 *
2571 * trace_buffer_unlock_commit_regs()
2572 * trace_event_buffer_commit()
2573 * trace_event_raw_event_xxx()
2574 */
2575 # define STACK_SKIP 3
2576
2577 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2578 struct ring_buffer *buffer,
2579 struct ring_buffer_event *event,
2580 unsigned long flags, int pc,
2581 struct pt_regs *regs)
2582 {
2583 __buffer_unlock_commit(buffer, event);
2584
2585 /*
2586 * If regs is not set, then skip the necessary functions.
2587 * Note, we can still get here via blktrace, wakeup tracer
2588 * and mmiotrace, but that's ok if they lose a function or
2589 * two. They are not that meaningful.
2590 */
2591 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2592 ftrace_trace_userstack(buffer, flags, pc);
2593 }
2594
2595 /*
2596 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2597 */
2598 void
2599 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2600 struct ring_buffer_event *event)
2601 {
2602 __buffer_unlock_commit(buffer, event);
2603 }
2604
2605 static void
2606 trace_process_export(struct trace_export *export,
2607 struct ring_buffer_event *event)
2608 {
2609 struct trace_entry *entry;
2610 unsigned int size = 0;
2611
2612 entry = ring_buffer_event_data(event);
2613 size = ring_buffer_event_length(event);
2614 export->write(export, entry, size);
2615 }
2616
2617 static DEFINE_MUTEX(ftrace_export_lock);
2618
2619 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2620
2621 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2622
2623 static inline void ftrace_exports_enable(void)
2624 {
2625 static_branch_enable(&ftrace_exports_enabled);
2626 }
2627
2628 static inline void ftrace_exports_disable(void)
2629 {
2630 static_branch_disable(&ftrace_exports_enabled);
2631 }
2632
2633 static void ftrace_exports(struct ring_buffer_event *event)
2634 {
2635 struct trace_export *export;
2636
2637 preempt_disable_notrace();
2638
2639 export = rcu_dereference_raw_notrace(ftrace_exports_list);
2640 while (export) {
2641 trace_process_export(export, event);
2642 export = rcu_dereference_raw_notrace(export->next);
2643 }
2644
2645 preempt_enable_notrace();
2646 }
2647
2648 static inline void
2649 add_trace_export(struct trace_export **list, struct trace_export *export)
2650 {
2651 rcu_assign_pointer(export->next, *list);
2652 /*
2653 * We are entering export into the list but another
2654 * CPU might be walking that list. We need to make sure
2655 * the export->next pointer is valid before another CPU sees
2656 * the export pointer included into the list.
2657 */
2658 rcu_assign_pointer(*list, export);
2659 }
2660
2661 static inline int
2662 rm_trace_export(struct trace_export **list, struct trace_export *export)
2663 {
2664 struct trace_export **p;
2665
2666 for (p = list; *p != NULL; p = &(*p)->next)
2667 if (*p == export)
2668 break;
2669
2670 if (*p != export)
2671 return -1;
2672
2673 rcu_assign_pointer(*p, (*p)->next);
2674
2675 return 0;
2676 }
2677
2678 static inline void
2679 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2680 {
2681 if (*list == NULL)
2682 ftrace_exports_enable();
2683
2684 add_trace_export(list, export);
2685 }
2686
2687 static inline int
2688 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2689 {
2690 int ret;
2691
2692 ret = rm_trace_export(list, export);
2693 if (*list == NULL)
2694 ftrace_exports_disable();
2695
2696 return ret;
2697 }
2698
2699 int register_ftrace_export(struct trace_export *export)
2700 {
2701 if (WARN_ON_ONCE(!export->write))
2702 return -1;
2703
2704 mutex_lock(&ftrace_export_lock);
2705
2706 add_ftrace_export(&ftrace_exports_list, export);
2707
2708 mutex_unlock(&ftrace_export_lock);
2709
2710 return 0;
2711 }
2712 EXPORT_SYMBOL_GPL(register_ftrace_export);
2713
2714 int unregister_ftrace_export(struct trace_export *export)
2715 {
2716 int ret;
2717
2718 mutex_lock(&ftrace_export_lock);
2719
2720 ret = rm_ftrace_export(&ftrace_exports_list, export);
2721
2722 mutex_unlock(&ftrace_export_lock);
2723
2724 return ret;
2725 }
2726 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2727
2728 void
2729 trace_function(struct trace_array *tr,
2730 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2731 int pc)
2732 {
2733 struct trace_event_call *call = &event_function;
2734 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2735 struct ring_buffer_event *event;
2736 struct ftrace_entry *entry;
2737
2738 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2739 flags, pc);
2740 if (!event)
2741 return;
2742 entry = ring_buffer_event_data(event);
2743 entry->ip = ip;
2744 entry->parent_ip = parent_ip;
2745
2746 if (!call_filter_check_discard(call, entry, buffer, event)) {
2747 if (static_branch_unlikely(&ftrace_exports_enabled))
2748 ftrace_exports(event);
2749 __buffer_unlock_commit(buffer, event);
2750 }
2751 }
2752
2753 #ifdef CONFIG_STACKTRACE
2754
2755 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2756 struct ftrace_stack {
2757 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2758 };
2759
2760 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2761 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2762
2763 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2764 unsigned long flags,
2765 int skip, int pc, struct pt_regs *regs)
2766 {
2767 struct trace_event_call *call = &event_kernel_stack;
2768 struct ring_buffer_event *event;
2769 struct stack_entry *entry;
2770 struct stack_trace trace;
2771 int use_stack;
2772 int size = FTRACE_STACK_ENTRIES;
2773
2774 trace.nr_entries = 0;
2775 trace.skip = skip;
2776
2777 /*
2778 * Add one, for this function and the call to save_stack_trace()
2779 * If regs is set, then these functions will not be in the way.
2780 */
2781 #ifndef CONFIG_UNWINDER_ORC
2782 if (!regs)
2783 trace.skip++;
2784 #endif
2785
2786 /*
2787 * Since events can happen in NMIs there's no safe way to
2788 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2789 * or NMI comes in, it will just have to use the default
2790 * FTRACE_STACK_SIZE.
2791 */
2792 preempt_disable_notrace();
2793
2794 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2795 /*
2796 * We don't need any atomic variables, just a barrier.
2797 * If an interrupt comes in, we don't care, because it would
2798 * have exited and put the counter back to what we want.
2799 * We just need a barrier to keep gcc from moving things
2800 * around.
2801 */
2802 barrier();
2803 if (use_stack == 1) {
2804 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2805 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2806
2807 if (regs)
2808 save_stack_trace_regs(regs, &trace);
2809 else
2810 save_stack_trace(&trace);
2811
2812 if (trace.nr_entries > size)
2813 size = trace.nr_entries;
2814 } else
2815 /* From now on, use_stack is a boolean */
2816 use_stack = 0;
2817
2818 size *= sizeof(unsigned long);
2819
2820 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2821 sizeof(*entry) + size, flags, pc);
2822 if (!event)
2823 goto out;
2824 entry = ring_buffer_event_data(event);
2825
2826 memset(&entry->caller, 0, size);
2827
2828 if (use_stack)
2829 memcpy(&entry->caller, trace.entries,
2830 trace.nr_entries * sizeof(unsigned long));
2831 else {
2832 trace.max_entries = FTRACE_STACK_ENTRIES;
2833 trace.entries = entry->caller;
2834 if (regs)
2835 save_stack_trace_regs(regs, &trace);
2836 else
2837 save_stack_trace(&trace);
2838 }
2839
2840 entry->size = trace.nr_entries;
2841
2842 if (!call_filter_check_discard(call, entry, buffer, event))
2843 __buffer_unlock_commit(buffer, event);
2844
2845 out:
2846 /* Again, don't let gcc optimize things here */
2847 barrier();
2848 __this_cpu_dec(ftrace_stack_reserve);
2849 preempt_enable_notrace();
2850
2851 }
2852
2853 static inline void ftrace_trace_stack(struct trace_array *tr,
2854 struct ring_buffer *buffer,
2855 unsigned long flags,
2856 int skip, int pc, struct pt_regs *regs)
2857 {
2858 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2859 return;
2860
2861 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2862 }
2863
2864 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2865 int pc)
2866 {
2867 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2868
2869 if (rcu_is_watching()) {
2870 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2871 return;
2872 }
2873
2874 /*
2875 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2876 * but if the above rcu_is_watching() failed, then the NMI
2877 * triggered someplace critical, and rcu_irq_enter() should
2878 * not be called from NMI.
2879 */
2880 if (unlikely(in_nmi()))
2881 return;
2882
2883 rcu_irq_enter_irqson();
2884 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2885 rcu_irq_exit_irqson();
2886 }
2887
2888 /**
2889 * trace_dump_stack - record a stack back trace in the trace buffer
2890 * @skip: Number of functions to skip (helper handlers)
2891 */
2892 void trace_dump_stack(int skip)
2893 {
2894 unsigned long flags;
2895
2896 if (tracing_disabled || tracing_selftest_running)
2897 return;
2898
2899 local_save_flags(flags);
2900
2901 #ifndef CONFIG_UNWINDER_ORC
2902 /* Skip 1 to skip this function. */
2903 skip++;
2904 #endif
2905 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2906 flags, skip, preempt_count(), NULL);
2907 }
2908 EXPORT_SYMBOL_GPL(trace_dump_stack);
2909
2910 static DEFINE_PER_CPU(int, user_stack_count);
2911
2912 void
2913 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2914 {
2915 struct trace_event_call *call = &event_user_stack;
2916 struct ring_buffer_event *event;
2917 struct userstack_entry *entry;
2918 struct stack_trace trace;
2919
2920 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2921 return;
2922
2923 /*
2924 * NMIs can not handle page faults, even with fix ups.
2925 * The save user stack can (and often does) fault.
2926 */
2927 if (unlikely(in_nmi()))
2928 return;
2929
2930 /*
2931 * prevent recursion, since the user stack tracing may
2932 * trigger other kernel events.
2933 */
2934 preempt_disable();
2935 if (__this_cpu_read(user_stack_count))
2936 goto out;
2937
2938 __this_cpu_inc(user_stack_count);
2939
2940 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2941 sizeof(*entry), flags, pc);
2942 if (!event)
2943 goto out_drop_count;
2944 entry = ring_buffer_event_data(event);
2945
2946 entry->tgid = current->tgid;
2947 memset(&entry->caller, 0, sizeof(entry->caller));
2948
2949 trace.nr_entries = 0;
2950 trace.max_entries = FTRACE_STACK_ENTRIES;
2951 trace.skip = 0;
2952 trace.entries = entry->caller;
2953
2954 save_stack_trace_user(&trace);
2955 if (!call_filter_check_discard(call, entry, buffer, event))
2956 __buffer_unlock_commit(buffer, event);
2957
2958 out_drop_count:
2959 __this_cpu_dec(user_stack_count);
2960 out:
2961 preempt_enable();
2962 }
2963
2964 #ifdef UNUSED
2965 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2966 {
2967 ftrace_trace_userstack(tr, flags, preempt_count());
2968 }
2969 #endif /* UNUSED */
2970
2971 #endif /* CONFIG_STACKTRACE */
2972
2973 /* created for use with alloc_percpu */
2974 struct trace_buffer_struct {
2975 int nesting;
2976 char buffer[4][TRACE_BUF_SIZE];
2977 };
2978
2979 static struct trace_buffer_struct *trace_percpu_buffer;
2980
2981 /*
2982 * Thise allows for lockless recording. If we're nested too deeply, then
2983 * this returns NULL.
2984 */
2985 static char *get_trace_buf(void)
2986 {
2987 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2988
2989 if (!buffer || buffer->nesting >= 4)
2990 return NULL;
2991
2992 buffer->nesting++;
2993
2994 /* Interrupts must see nesting incremented before we use the buffer */
2995 barrier();
2996 return &buffer->buffer[buffer->nesting][0];
2997 }
2998
2999 static void put_trace_buf(void)
3000 {
3001 /* Don't let the decrement of nesting leak before this */
3002 barrier();
3003 this_cpu_dec(trace_percpu_buffer->nesting);
3004 }
3005
3006 static int alloc_percpu_trace_buffer(void)
3007 {
3008 struct trace_buffer_struct *buffers;
3009
3010 buffers = alloc_percpu(struct trace_buffer_struct);
3011 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3012 return -ENOMEM;
3013
3014 trace_percpu_buffer = buffers;
3015 return 0;
3016 }
3017
3018 static int buffers_allocated;
3019
3020 void trace_printk_init_buffers(void)
3021 {
3022 if (buffers_allocated)
3023 return;
3024
3025 if (alloc_percpu_trace_buffer())
3026 return;
3027
3028 /* trace_printk() is for debug use only. Don't use it in production. */
3029
3030 pr_warn("\n");
3031 pr_warn("**********************************************************\n");
3032 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3033 pr_warn("** **\n");
3034 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3035 pr_warn("** **\n");
3036 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3037 pr_warn("** unsafe for production use. **\n");
3038 pr_warn("** **\n");
3039 pr_warn("** If you see this message and you are not debugging **\n");
3040 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3041 pr_warn("** **\n");
3042 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3043 pr_warn("**********************************************************\n");
3044
3045 /* Expand the buffers to set size */
3046 tracing_update_buffers();
3047
3048 buffers_allocated = 1;
3049
3050 /*
3051 * trace_printk_init_buffers() can be called by modules.
3052 * If that happens, then we need to start cmdline recording
3053 * directly here. If the global_trace.buffer is already
3054 * allocated here, then this was called by module code.
3055 */
3056 if (global_trace.trace_buffer.buffer)
3057 tracing_start_cmdline_record();
3058 }
3059
3060 void trace_printk_start_comm(void)
3061 {
3062 /* Start tracing comms if trace printk is set */
3063 if (!buffers_allocated)
3064 return;
3065 tracing_start_cmdline_record();
3066 }
3067
3068 static void trace_printk_start_stop_comm(int enabled)
3069 {
3070 if (!buffers_allocated)
3071 return;
3072
3073 if (enabled)
3074 tracing_start_cmdline_record();
3075 else
3076 tracing_stop_cmdline_record();
3077 }
3078
3079 /**
3080 * trace_vbprintk - write binary msg to tracing buffer
3081 *
3082 */
3083 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3084 {
3085 struct trace_event_call *call = &event_bprint;
3086 struct ring_buffer_event *event;
3087 struct ring_buffer *buffer;
3088 struct trace_array *tr = &global_trace;
3089 struct bprint_entry *entry;
3090 unsigned long flags;
3091 char *tbuffer;
3092 int len = 0, size, pc;
3093
3094 if (unlikely(tracing_selftest_running || tracing_disabled))
3095 return 0;
3096
3097 /* Don't pollute graph traces with trace_vprintk internals */
3098 pause_graph_tracing();
3099
3100 pc = preempt_count();
3101 preempt_disable_notrace();
3102
3103 tbuffer = get_trace_buf();
3104 if (!tbuffer) {
3105 len = 0;
3106 goto out_nobuffer;
3107 }
3108
3109 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3110
3111 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3112 goto out;
3113
3114 local_save_flags(flags);
3115 size = sizeof(*entry) + sizeof(u32) * len;
3116 buffer = tr->trace_buffer.buffer;
3117 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3118 flags, pc);
3119 if (!event)
3120 goto out;
3121 entry = ring_buffer_event_data(event);
3122 entry->ip = ip;
3123 entry->fmt = fmt;
3124
3125 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3126 if (!call_filter_check_discard(call, entry, buffer, event)) {
3127 __buffer_unlock_commit(buffer, event);
3128 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3129 }
3130
3131 out:
3132 put_trace_buf();
3133
3134 out_nobuffer:
3135 preempt_enable_notrace();
3136 unpause_graph_tracing();
3137
3138 return len;
3139 }
3140 EXPORT_SYMBOL_GPL(trace_vbprintk);
3141
3142 __printf(3, 0)
3143 static int
3144 __trace_array_vprintk(struct ring_buffer *buffer,
3145 unsigned long ip, const char *fmt, va_list args)
3146 {
3147 struct trace_event_call *call = &event_print;
3148 struct ring_buffer_event *event;
3149 int len = 0, size, pc;
3150 struct print_entry *entry;
3151 unsigned long flags;
3152 char *tbuffer;
3153
3154 if (tracing_disabled || tracing_selftest_running)
3155 return 0;
3156
3157 /* Don't pollute graph traces with trace_vprintk internals */
3158 pause_graph_tracing();
3159
3160 pc = preempt_count();
3161 preempt_disable_notrace();
3162
3163
3164 tbuffer = get_trace_buf();
3165 if (!tbuffer) {
3166 len = 0;
3167 goto out_nobuffer;
3168 }
3169
3170 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3171
3172 local_save_flags(flags);
3173 size = sizeof(*entry) + len + 1;
3174 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3175 flags, pc);
3176 if (!event)
3177 goto out;
3178 entry = ring_buffer_event_data(event);
3179 entry->ip = ip;
3180
3181 memcpy(&entry->buf, tbuffer, len + 1);
3182 if (!call_filter_check_discard(call, entry, buffer, event)) {
3183 __buffer_unlock_commit(buffer, event);
3184 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3185 }
3186
3187 out:
3188 put_trace_buf();
3189
3190 out_nobuffer:
3191 preempt_enable_notrace();
3192 unpause_graph_tracing();
3193
3194 return len;
3195 }
3196
3197 __printf(3, 0)
3198 int trace_array_vprintk(struct trace_array *tr,
3199 unsigned long ip, const char *fmt, va_list args)
3200 {
3201 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3202 }
3203
3204 __printf(3, 0)
3205 int trace_array_printk(struct trace_array *tr,
3206 unsigned long ip, const char *fmt, ...)
3207 {
3208 int ret;
3209 va_list ap;
3210
3211 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3212 return 0;
3213
3214 va_start(ap, fmt);
3215 ret = trace_array_vprintk(tr, ip, fmt, ap);
3216 va_end(ap);
3217 return ret;
3218 }
3219
3220 __printf(3, 4)
3221 int trace_array_printk_buf(struct ring_buffer *buffer,
3222 unsigned long ip, const char *fmt, ...)
3223 {
3224 int ret;
3225 va_list ap;
3226
3227 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3228 return 0;
3229
3230 va_start(ap, fmt);
3231 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3232 va_end(ap);
3233 return ret;
3234 }
3235
3236 __printf(2, 0)
3237 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3238 {
3239 return trace_array_vprintk(&global_trace, ip, fmt, args);
3240 }
3241 EXPORT_SYMBOL_GPL(trace_vprintk);
3242
3243 static void trace_iterator_increment(struct trace_iterator *iter)
3244 {
3245 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3246
3247 iter->idx++;
3248 if (buf_iter)
3249 ring_buffer_read(buf_iter, NULL);
3250 }
3251
3252 static struct trace_entry *
3253 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3254 unsigned long *lost_events)
3255 {
3256 struct ring_buffer_event *event;
3257 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3258
3259 if (buf_iter)
3260 event = ring_buffer_iter_peek(buf_iter, ts);
3261 else
3262 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3263 lost_events);
3264
3265 if (event) {
3266 iter->ent_size = ring_buffer_event_length(event);
3267 return ring_buffer_event_data(event);
3268 }
3269 iter->ent_size = 0;
3270 return NULL;
3271 }
3272
3273 static struct trace_entry *
3274 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3275 unsigned long *missing_events, u64 *ent_ts)
3276 {
3277 struct ring_buffer *buffer = iter->trace_buffer->buffer;
3278 struct trace_entry *ent, *next = NULL;
3279 unsigned long lost_events = 0, next_lost = 0;
3280 int cpu_file = iter->cpu_file;
3281 u64 next_ts = 0, ts;
3282 int next_cpu = -1;
3283 int next_size = 0;
3284 int cpu;
3285
3286 /*
3287 * If we are in a per_cpu trace file, don't bother by iterating over
3288 * all cpu and peek directly.
3289 */
3290 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3291 if (ring_buffer_empty_cpu(buffer, cpu_file))
3292 return NULL;
3293 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3294 if (ent_cpu)
3295 *ent_cpu = cpu_file;
3296
3297 return ent;
3298 }
3299
3300 for_each_tracing_cpu(cpu) {
3301
3302 if (ring_buffer_empty_cpu(buffer, cpu))
3303 continue;
3304
3305 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3306
3307 /*
3308 * Pick the entry with the smallest timestamp:
3309 */
3310 if (ent && (!next || ts < next_ts)) {
3311 next = ent;
3312 next_cpu = cpu;
3313 next_ts = ts;
3314 next_lost = lost_events;
3315 next_size = iter->ent_size;
3316 }
3317 }
3318
3319 iter->ent_size = next_size;
3320
3321 if (ent_cpu)
3322 *ent_cpu = next_cpu;
3323
3324 if (ent_ts)
3325 *ent_ts = next_ts;
3326
3327 if (missing_events)
3328 *missing_events = next_lost;
3329
3330 return next;
3331 }
3332
3333 /* Find the next real entry, without updating the iterator itself */
3334 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3335 int *ent_cpu, u64 *ent_ts)
3336 {
3337 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3338 }
3339
3340 /* Find the next real entry, and increment the iterator to the next entry */
3341 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3342 {
3343 iter->ent = __find_next_entry(iter, &iter->cpu,
3344 &iter->lost_events, &iter->ts);
3345
3346 if (iter->ent)
3347 trace_iterator_increment(iter);
3348
3349 return iter->ent ? iter : NULL;
3350 }
3351
3352 static void trace_consume(struct trace_iterator *iter)
3353 {
3354 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3355 &iter->lost_events);
3356 }
3357
3358 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3359 {
3360 struct trace_iterator *iter = m->private;
3361 int i = (int)*pos;
3362 void *ent;
3363
3364 WARN_ON_ONCE(iter->leftover);
3365
3366 (*pos)++;
3367
3368 /* can't go backwards */
3369 if (iter->idx > i)
3370 return NULL;
3371
3372 if (iter->idx < 0)
3373 ent = trace_find_next_entry_inc(iter);
3374 else
3375 ent = iter;
3376
3377 while (ent && iter->idx < i)
3378 ent = trace_find_next_entry_inc(iter);
3379
3380 iter->pos = *pos;
3381
3382 return ent;
3383 }
3384
3385 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3386 {
3387 struct ring_buffer_event *event;
3388 struct ring_buffer_iter *buf_iter;
3389 unsigned long entries = 0;
3390 u64 ts;
3391
3392 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3393
3394 buf_iter = trace_buffer_iter(iter, cpu);
3395 if (!buf_iter)
3396 return;
3397
3398 ring_buffer_iter_reset(buf_iter);
3399
3400 /*
3401 * We could have the case with the max latency tracers
3402 * that a reset never took place on a cpu. This is evident
3403 * by the timestamp being before the start of the buffer.
3404 */
3405 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3406 if (ts >= iter->trace_buffer->time_start)
3407 break;
3408 entries++;
3409 ring_buffer_read(buf_iter, NULL);
3410 }
3411
3412 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3413 }
3414
3415 /*
3416 * The current tracer is copied to avoid a global locking
3417 * all around.
3418 */
3419 static void *s_start(struct seq_file *m, loff_t *pos)
3420 {
3421 struct trace_iterator *iter = m->private;
3422 struct trace_array *tr = iter->tr;
3423 int cpu_file = iter->cpu_file;
3424 void *p = NULL;
3425 loff_t l = 0;
3426 int cpu;
3427
3428 /*
3429 * copy the tracer to avoid using a global lock all around.
3430 * iter->trace is a copy of current_trace, the pointer to the
3431 * name may be used instead of a strcmp(), as iter->trace->name
3432 * will point to the same string as current_trace->name.
3433 */
3434 mutex_lock(&trace_types_lock);
3435 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3436 *iter->trace = *tr->current_trace;
3437 mutex_unlock(&trace_types_lock);
3438
3439 #ifdef CONFIG_TRACER_MAX_TRACE
3440 if (iter->snapshot && iter->trace->use_max_tr)
3441 return ERR_PTR(-EBUSY);
3442 #endif
3443
3444 if (!iter->snapshot)
3445 atomic_inc(&trace_record_taskinfo_disabled);
3446
3447 if (*pos != iter->pos) {
3448 iter->ent = NULL;
3449 iter->cpu = 0;
3450 iter->idx = -1;
3451
3452 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3453 for_each_tracing_cpu(cpu)
3454 tracing_iter_reset(iter, cpu);
3455 } else
3456 tracing_iter_reset(iter, cpu_file);
3457
3458 iter->leftover = 0;
3459 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3460 ;
3461
3462 } else {
3463 /*
3464 * If we overflowed the seq_file before, then we want
3465 * to just reuse the trace_seq buffer again.
3466 */
3467 if (iter->leftover)
3468 p = iter;
3469 else {
3470 l = *pos - 1;
3471 p = s_next(m, p, &l);
3472 }
3473 }
3474
3475 trace_event_read_lock();
3476 trace_access_lock(cpu_file);
3477 return p;
3478 }
3479
3480 static void s_stop(struct seq_file *m, void *p)
3481 {
3482 struct trace_iterator *iter = m->private;
3483
3484 #ifdef CONFIG_TRACER_MAX_TRACE
3485 if (iter->snapshot && iter->trace->use_max_tr)
3486 return;
3487 #endif
3488
3489 if (!iter->snapshot)
3490 atomic_dec(&trace_record_taskinfo_disabled);
3491
3492 trace_access_unlock(iter->cpu_file);
3493 trace_event_read_unlock();
3494 }
3495
3496 static void
3497 get_total_entries(struct trace_buffer *buf,
3498 unsigned long *total, unsigned long *entries)
3499 {
3500 unsigned long count;
3501 int cpu;
3502
3503 *total = 0;
3504 *entries = 0;
3505
3506 for_each_tracing_cpu(cpu) {
3507 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3508 /*
3509 * If this buffer has skipped entries, then we hold all
3510 * entries for the trace and we need to ignore the
3511 * ones before the time stamp.
3512 */
3513 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3514 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3515 /* total is the same as the entries */
3516 *total += count;
3517 } else
3518 *total += count +
3519 ring_buffer_overrun_cpu(buf->buffer, cpu);
3520 *entries += count;
3521 }
3522 }
3523
3524 static void print_lat_help_header(struct seq_file *m)
3525 {
3526 seq_puts(m, "# _------=> CPU# \n"
3527 "# / _-----=> irqs-off \n"
3528 "# | / _----=> need-resched \n"
3529 "# || / _---=> hardirq/softirq \n"
3530 "# ||| / _--=> preempt-depth \n"
3531 "# |||| / delay \n"
3532 "# cmd pid ||||| time | caller \n"
3533 "# \\ / ||||| \\ | / \n");
3534 }
3535
3536 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3537 {
3538 unsigned long total;
3539 unsigned long entries;
3540
3541 get_total_entries(buf, &total, &entries);
3542 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3543 entries, total, num_online_cpus());
3544 seq_puts(m, "#\n");
3545 }
3546
3547 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3548 unsigned int flags)
3549 {
3550 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3551
3552 print_event_info(buf, m);
3553
3554 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
3555 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3556 }
3557
3558 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3559 unsigned int flags)
3560 {
3561 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3562 const char tgid_space[] = " ";
3563 const char space[] = " ";
3564
3565 print_event_info(buf, m);
3566
3567 seq_printf(m, "# %s _-----=> irqs-off\n",
3568 tgid ? tgid_space : space);
3569 seq_printf(m, "# %s / _----=> need-resched\n",
3570 tgid ? tgid_space : space);
3571 seq_printf(m, "# %s| / _---=> hardirq/softirq\n",
3572 tgid ? tgid_space : space);
3573 seq_printf(m, "# %s|| / _--=> preempt-depth\n",
3574 tgid ? tgid_space : space);
3575 seq_printf(m, "# %s||| / delay\n",
3576 tgid ? tgid_space : space);
3577 seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n",
3578 tgid ? " TGID " : space);
3579 seq_printf(m, "# | | %s | |||| | |\n",
3580 tgid ? " | " : space);
3581 }
3582
3583 void
3584 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3585 {
3586 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3587 struct trace_buffer *buf = iter->trace_buffer;
3588 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3589 struct tracer *type = iter->trace;
3590 unsigned long entries;
3591 unsigned long total;
3592 const char *name = "preemption";
3593
3594 name = type->name;
3595
3596 get_total_entries(buf, &total, &entries);
3597
3598 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3599 name, UTS_RELEASE);
3600 seq_puts(m, "# -----------------------------------"
3601 "---------------------------------\n");
3602 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3603 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3604 nsecs_to_usecs(data->saved_latency),
3605 entries,
3606 total,
3607 buf->cpu,
3608 #if defined(CONFIG_PREEMPT_NONE)
3609 "server",
3610 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3611 "desktop",
3612 #elif defined(CONFIG_PREEMPT)
3613 "preempt",
3614 #else
3615 "unknown",
3616 #endif
3617 /* These are reserved for later use */
3618 0, 0, 0, 0);
3619 #ifdef CONFIG_SMP
3620 seq_printf(m, " #P:%d)\n", num_online_cpus());
3621 #else
3622 seq_puts(m, ")\n");
3623 #endif
3624 seq_puts(m, "# -----------------\n");
3625 seq_printf(m, "# | task: %.16s-%d "
3626 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3627 data->comm, data->pid,
3628 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3629 data->policy, data->rt_priority);
3630 seq_puts(m, "# -----------------\n");
3631
3632 if (data->critical_start) {
3633 seq_puts(m, "# => started at: ");
3634 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3635 trace_print_seq(m, &iter->seq);
3636 seq_puts(m, "\n# => ended at: ");
3637 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3638 trace_print_seq(m, &iter->seq);
3639 seq_puts(m, "\n#\n");
3640 }
3641
3642 seq_puts(m, "#\n");
3643 }
3644
3645 static void test_cpu_buff_start(struct trace_iterator *iter)
3646 {
3647 struct trace_seq *s = &iter->seq;
3648 struct trace_array *tr = iter->tr;
3649
3650 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3651 return;
3652
3653 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3654 return;
3655
3656 if (cpumask_available(iter->started) &&
3657 cpumask_test_cpu(iter->cpu, iter->started))
3658 return;
3659
3660 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3661 return;
3662
3663 if (cpumask_available(iter->started))
3664 cpumask_set_cpu(iter->cpu, iter->started);
3665
3666 /* Don't print started cpu buffer for the first entry of the trace */
3667 if (iter->idx > 1)
3668 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3669 iter->cpu);
3670 }
3671
3672 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3673 {
3674 struct trace_array *tr = iter->tr;
3675 struct trace_seq *s = &iter->seq;
3676 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3677 struct trace_entry *entry;
3678 struct trace_event *event;
3679
3680 entry = iter->ent;
3681
3682 test_cpu_buff_start(iter);
3683
3684 event = ftrace_find_event(entry->type);
3685
3686 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3687 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3688 trace_print_lat_context(iter);
3689 else
3690 trace_print_context(iter);
3691 }
3692
3693 if (trace_seq_has_overflowed(s))
3694 return TRACE_TYPE_PARTIAL_LINE;
3695
3696 if (event)
3697 return event->funcs->trace(iter, sym_flags, event);
3698
3699 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3700
3701 return trace_handle_return(s);
3702 }
3703
3704 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3705 {
3706 struct trace_array *tr = iter->tr;
3707 struct trace_seq *s = &iter->seq;
3708 struct trace_entry *entry;
3709 struct trace_event *event;
3710
3711 entry = iter->ent;
3712
3713 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3714 trace_seq_printf(s, "%d %d %llu ",
3715 entry->pid, iter->cpu, iter->ts);
3716
3717 if (trace_seq_has_overflowed(s))
3718 return TRACE_TYPE_PARTIAL_LINE;
3719
3720 event = ftrace_find_event(entry->type);
3721 if (event)
3722 return event->funcs->raw(iter, 0, event);
3723
3724 trace_seq_printf(s, "%d ?\n", entry->type);
3725
3726 return trace_handle_return(s);
3727 }
3728
3729 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3730 {
3731 struct trace_array *tr = iter->tr;
3732 struct trace_seq *s = &iter->seq;
3733 unsigned char newline = '\n';
3734 struct trace_entry *entry;
3735 struct trace_event *event;
3736
3737 entry = iter->ent;
3738
3739 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3740 SEQ_PUT_HEX_FIELD(s, entry->pid);
3741 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3742 SEQ_PUT_HEX_FIELD(s, iter->ts);
3743 if (trace_seq_has_overflowed(s))
3744 return TRACE_TYPE_PARTIAL_LINE;
3745 }
3746
3747 event = ftrace_find_event(entry->type);
3748 if (event) {
3749 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3750 if (ret != TRACE_TYPE_HANDLED)
3751 return ret;
3752 }
3753
3754 SEQ_PUT_FIELD(s, newline);
3755
3756 return trace_handle_return(s);
3757 }
3758
3759 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3760 {
3761 struct trace_array *tr = iter->tr;
3762 struct trace_seq *s = &iter->seq;
3763 struct trace_entry *entry;
3764 struct trace_event *event;
3765
3766 entry = iter->ent;
3767
3768 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3769 SEQ_PUT_FIELD(s, entry->pid);
3770 SEQ_PUT_FIELD(s, iter->cpu);
3771 SEQ_PUT_FIELD(s, iter->ts);
3772 if (trace_seq_has_overflowed(s))
3773 return TRACE_TYPE_PARTIAL_LINE;
3774 }
3775
3776 event = ftrace_find_event(entry->type);
3777 return event ? event->funcs->binary(iter, 0, event) :
3778 TRACE_TYPE_HANDLED;
3779 }
3780
3781 int trace_empty(struct trace_iterator *iter)
3782 {
3783 struct ring_buffer_iter *buf_iter;
3784 int cpu;
3785
3786 /* If we are looking at one CPU buffer, only check that one */
3787 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3788 cpu = iter->cpu_file;
3789 buf_iter = trace_buffer_iter(iter, cpu);
3790 if (buf_iter) {
3791 if (!ring_buffer_iter_empty(buf_iter))
3792 return 0;
3793 } else {
3794 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3795 return 0;
3796 }
3797 return 1;
3798 }
3799
3800 for_each_tracing_cpu(cpu) {
3801 buf_iter = trace_buffer_iter(iter, cpu);
3802 if (buf_iter) {
3803 if (!ring_buffer_iter_empty(buf_iter))
3804 return 0;
3805 } else {
3806 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3807 return 0;
3808 }
3809 }
3810
3811 return 1;
3812 }
3813
3814 /* Called with trace_event_read_lock() held. */
3815 enum print_line_t print_trace_line(struct trace_iterator *iter)
3816 {
3817 struct trace_array *tr = iter->tr;
3818 unsigned long trace_flags = tr->trace_flags;
3819 enum print_line_t ret;
3820
3821 if (iter->lost_events) {
3822 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3823 iter->cpu, iter->lost_events);
3824 if (trace_seq_has_overflowed(&iter->seq))
3825 return TRACE_TYPE_PARTIAL_LINE;
3826 }
3827
3828 if (iter->trace && iter->trace->print_line) {
3829 ret = iter->trace->print_line(iter);
3830 if (ret != TRACE_TYPE_UNHANDLED)
3831 return ret;
3832 }
3833
3834 if (iter->ent->type == TRACE_BPUTS &&
3835 trace_flags & TRACE_ITER_PRINTK &&
3836 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3837 return trace_print_bputs_msg_only(iter);
3838
3839 if (iter->ent->type == TRACE_BPRINT &&
3840 trace_flags & TRACE_ITER_PRINTK &&
3841 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3842 return trace_print_bprintk_msg_only(iter);
3843
3844 if (iter->ent->type == TRACE_PRINT &&
3845 trace_flags & TRACE_ITER_PRINTK &&
3846 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3847 return trace_print_printk_msg_only(iter);
3848
3849 if (trace_flags & TRACE_ITER_BIN)
3850 return print_bin_fmt(iter);
3851
3852 if (trace_flags & TRACE_ITER_HEX)
3853 return print_hex_fmt(iter);
3854
3855 if (trace_flags & TRACE_ITER_RAW)
3856 return print_raw_fmt(iter);
3857
3858 return print_trace_fmt(iter);
3859 }
3860
3861 void trace_latency_header(struct seq_file *m)
3862 {
3863 struct trace_iterator *iter = m->private;
3864 struct trace_array *tr = iter->tr;
3865
3866 /* print nothing if the buffers are empty */
3867 if (trace_empty(iter))
3868 return;
3869
3870 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3871 print_trace_header(m, iter);
3872
3873 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3874 print_lat_help_header(m);
3875 }
3876
3877 void trace_default_header(struct seq_file *m)
3878 {
3879 struct trace_iterator *iter = m->private;
3880 struct trace_array *tr = iter->tr;
3881 unsigned long trace_flags = tr->trace_flags;
3882
3883 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3884 return;
3885
3886 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3887 /* print nothing if the buffers are empty */
3888 if (trace_empty(iter))
3889 return;
3890 print_trace_header(m, iter);
3891 if (!(trace_flags & TRACE_ITER_VERBOSE))
3892 print_lat_help_header(m);
3893 } else {
3894 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3895 if (trace_flags & TRACE_ITER_IRQ_INFO)
3896 print_func_help_header_irq(iter->trace_buffer,
3897 m, trace_flags);
3898 else
3899 print_func_help_header(iter->trace_buffer, m,
3900 trace_flags);
3901 }
3902 }
3903 }
3904
3905 static void test_ftrace_alive(struct seq_file *m)
3906 {
3907 if (!ftrace_is_dead())
3908 return;
3909 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3910 "# MAY BE MISSING FUNCTION EVENTS\n");
3911 }
3912
3913 #ifdef CONFIG_TRACER_MAX_TRACE
3914 static void show_snapshot_main_help(struct seq_file *m)
3915 {
3916 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3917 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3918 "# Takes a snapshot of the main buffer.\n"
3919 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3920 "# (Doesn't have to be '2' works with any number that\n"
3921 "# is not a '0' or '1')\n");
3922 }
3923
3924 static void show_snapshot_percpu_help(struct seq_file *m)
3925 {
3926 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3927 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3928 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3929 "# Takes a snapshot of the main buffer for this cpu.\n");
3930 #else
3931 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3932 "# Must use main snapshot file to allocate.\n");
3933 #endif
3934 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3935 "# (Doesn't have to be '2' works with any number that\n"
3936 "# is not a '0' or '1')\n");
3937 }
3938
3939 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3940 {
3941 if (iter->tr->allocated_snapshot)
3942 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3943 else
3944 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3945
3946 seq_puts(m, "# Snapshot commands:\n");
3947 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3948 show_snapshot_main_help(m);
3949 else
3950 show_snapshot_percpu_help(m);
3951 }
3952 #else
3953 /* Should never be called */
3954 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3955 #endif
3956
3957 static int s_show(struct seq_file *m, void *v)
3958 {
3959 struct trace_iterator *iter = v;
3960 int ret;
3961
3962 if (iter->ent == NULL) {
3963 if (iter->tr) {
3964 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3965 seq_puts(m, "#\n");
3966 test_ftrace_alive(m);
3967 }
3968 if (iter->snapshot && trace_empty(iter))
3969 print_snapshot_help(m, iter);
3970 else if (iter->trace && iter->trace->print_header)
3971 iter->trace->print_header(m);
3972 else
3973 trace_default_header(m);
3974
3975 } else if (iter->leftover) {
3976 /*
3977 * If we filled the seq_file buffer earlier, we
3978 * want to just show it now.
3979 */
3980 ret = trace_print_seq(m, &iter->seq);
3981
3982 /* ret should this time be zero, but you never know */
3983 iter->leftover = ret;
3984
3985 } else {
3986 print_trace_line(iter);
3987 ret = trace_print_seq(m, &iter->seq);
3988 /*
3989 * If we overflow the seq_file buffer, then it will
3990 * ask us for this data again at start up.
3991 * Use that instead.
3992 * ret is 0 if seq_file write succeeded.
3993 * -1 otherwise.
3994 */
3995 iter->leftover = ret;
3996 }
3997
3998 return 0;
3999 }
4000
4001 /*
4002 * Should be used after trace_array_get(), trace_types_lock
4003 * ensures that i_cdev was already initialized.
4004 */
4005 static inline int tracing_get_cpu(struct inode *inode)
4006 {
4007 if (inode->i_cdev) /* See trace_create_cpu_file() */
4008 return (long)inode->i_cdev - 1;
4009 return RING_BUFFER_ALL_CPUS;
4010 }
4011
4012 static const struct seq_operations tracer_seq_ops = {
4013 .start = s_start,
4014 .next = s_next,
4015 .stop = s_stop,
4016 .show = s_show,
4017 };
4018
4019 static struct trace_iterator *
4020 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4021 {
4022 struct trace_array *tr = inode->i_private;
4023 struct trace_iterator *iter;
4024 int cpu;
4025
4026 if (tracing_disabled)
4027 return ERR_PTR(-ENODEV);
4028
4029 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4030 if (!iter)
4031 return ERR_PTR(-ENOMEM);
4032
4033 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4034 GFP_KERNEL);
4035 if (!iter->buffer_iter)
4036 goto release;
4037
4038 /*
4039 * We make a copy of the current tracer to avoid concurrent
4040 * changes on it while we are reading.
4041 */
4042 mutex_lock(&trace_types_lock);
4043 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4044 if (!iter->trace)
4045 goto fail;
4046
4047 *iter->trace = *tr->current_trace;
4048
4049 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4050 goto fail;
4051
4052 iter->tr = tr;
4053
4054 #ifdef CONFIG_TRACER_MAX_TRACE
4055 /* Currently only the top directory has a snapshot */
4056 if (tr->current_trace->print_max || snapshot)
4057 iter->trace_buffer = &tr->max_buffer;
4058 else
4059 #endif
4060 iter->trace_buffer = &tr->trace_buffer;
4061 iter->snapshot = snapshot;
4062 iter->pos = -1;
4063 iter->cpu_file = tracing_get_cpu(inode);
4064 mutex_init(&iter->mutex);
4065
4066 /* Notify the tracer early; before we stop tracing. */
4067 if (iter->trace && iter->trace->open)
4068 iter->trace->open(iter);
4069
4070 /* Annotate start of buffers if we had overruns */
4071 if (ring_buffer_overruns(iter->trace_buffer->buffer))
4072 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4073
4074 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4075 if (trace_clocks[tr->clock_id].in_ns)
4076 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4077
4078 /* stop the trace while dumping if we are not opening "snapshot" */
4079 if (!iter->snapshot)
4080 tracing_stop_tr(tr);
4081
4082 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4083 for_each_tracing_cpu(cpu) {
4084 iter->buffer_iter[cpu] =
4085 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4086 cpu, GFP_KERNEL);
4087 }
4088 ring_buffer_read_prepare_sync();
4089 for_each_tracing_cpu(cpu) {
4090 ring_buffer_read_start(iter->buffer_iter[cpu]);
4091 tracing_iter_reset(iter, cpu);
4092 }
4093 } else {
4094 cpu = iter->cpu_file;
4095 iter->buffer_iter[cpu] =
4096 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4097 cpu, GFP_KERNEL);
4098 ring_buffer_read_prepare_sync();
4099 ring_buffer_read_start(iter->buffer_iter[cpu]);
4100 tracing_iter_reset(iter, cpu);
4101 }
4102
4103 mutex_unlock(&trace_types_lock);
4104
4105 return iter;
4106
4107 fail:
4108 mutex_unlock(&trace_types_lock);
4109 kfree(iter->trace);
4110 kfree(iter->buffer_iter);
4111 release:
4112 seq_release_private(inode, file);
4113 return ERR_PTR(-ENOMEM);
4114 }
4115
4116 int tracing_open_generic(struct inode *inode, struct file *filp)
4117 {
4118 if (tracing_disabled)
4119 return -ENODEV;
4120
4121 filp->private_data = inode->i_private;
4122 return 0;
4123 }
4124
4125 bool tracing_is_disabled(void)
4126 {
4127 return (tracing_disabled) ? true: false;
4128 }
4129
4130 /*
4131 * Open and update trace_array ref count.
4132 * Must have the current trace_array passed to it.
4133 */
4134 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4135 {
4136 struct trace_array *tr = inode->i_private;
4137
4138 if (tracing_disabled)
4139 return -ENODEV;
4140
4141 if (trace_array_get(tr) < 0)
4142 return -ENODEV;
4143
4144 filp->private_data = inode->i_private;
4145
4146 return 0;
4147 }
4148
4149 static int tracing_release(struct inode *inode, struct file *file)
4150 {
4151 struct trace_array *tr = inode->i_private;
4152 struct seq_file *m = file->private_data;
4153 struct trace_iterator *iter;
4154 int cpu;
4155
4156 if (!(file->f_mode & FMODE_READ)) {
4157 trace_array_put(tr);
4158 return 0;
4159 }
4160
4161 /* Writes do not use seq_file */
4162 iter = m->private;
4163 mutex_lock(&trace_types_lock);
4164
4165 for_each_tracing_cpu(cpu) {
4166 if (iter->buffer_iter[cpu])
4167 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4168 }
4169
4170 if (iter->trace && iter->trace->close)
4171 iter->trace->close(iter);
4172
4173 if (!iter->snapshot)
4174 /* reenable tracing if it was previously enabled */
4175 tracing_start_tr(tr);
4176
4177 __trace_array_put(tr);
4178
4179 mutex_unlock(&trace_types_lock);
4180
4181 mutex_destroy(&iter->mutex);
4182 free_cpumask_var(iter->started);
4183 kfree(iter->trace);
4184 kfree(iter->buffer_iter);
4185 seq_release_private(inode, file);
4186
4187 return 0;
4188 }
4189
4190 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4191 {
4192 struct trace_array *tr = inode->i_private;
4193
4194 trace_array_put(tr);
4195 return 0;
4196 }
4197
4198 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4199 {
4200 struct trace_array *tr = inode->i_private;
4201
4202 trace_array_put(tr);
4203
4204 return single_release(inode, file);
4205 }
4206
4207 static int tracing_open(struct inode *inode, struct file *file)
4208 {
4209 struct trace_array *tr = inode->i_private;
4210 struct trace_iterator *iter;
4211 int ret = 0;
4212
4213 if (trace_array_get(tr) < 0)
4214 return -ENODEV;
4215
4216 /* If this file was open for write, then erase contents */
4217 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4218 int cpu = tracing_get_cpu(inode);
4219 struct trace_buffer *trace_buf = &tr->trace_buffer;
4220
4221 #ifdef CONFIG_TRACER_MAX_TRACE
4222 if (tr->current_trace->print_max)
4223 trace_buf = &tr->max_buffer;
4224 #endif
4225
4226 if (cpu == RING_BUFFER_ALL_CPUS)
4227 tracing_reset_online_cpus(trace_buf);
4228 else
4229 tracing_reset(trace_buf, cpu);
4230 }
4231
4232 if (file->f_mode & FMODE_READ) {
4233 iter = __tracing_open(inode, file, false);
4234 if (IS_ERR(iter))
4235 ret = PTR_ERR(iter);
4236 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4237 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4238 }
4239
4240 if (ret < 0)
4241 trace_array_put(tr);
4242
4243 return ret;
4244 }
4245
4246 /*
4247 * Some tracers are not suitable for instance buffers.
4248 * A tracer is always available for the global array (toplevel)
4249 * or if it explicitly states that it is.
4250 */
4251 static bool
4252 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4253 {
4254 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4255 }
4256
4257 /* Find the next tracer that this trace array may use */
4258 static struct tracer *
4259 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4260 {
4261 while (t && !trace_ok_for_array(t, tr))
4262 t = t->next;
4263
4264 return t;
4265 }
4266
4267 static void *
4268 t_next(struct seq_file *m, void *v, loff_t *pos)
4269 {
4270 struct trace_array *tr = m->private;
4271 struct tracer *t = v;
4272
4273 (*pos)++;
4274
4275 if (t)
4276 t = get_tracer_for_array(tr, t->next);
4277
4278 return t;
4279 }
4280
4281 static void *t_start(struct seq_file *m, loff_t *pos)
4282 {
4283 struct trace_array *tr = m->private;
4284 struct tracer *t;
4285 loff_t l = 0;
4286
4287 mutex_lock(&trace_types_lock);
4288
4289 t = get_tracer_for_array(tr, trace_types);
4290 for (; t && l < *pos; t = t_next(m, t, &l))
4291 ;
4292
4293 return t;
4294 }
4295
4296 static void t_stop(struct seq_file *m, void *p)
4297 {
4298 mutex_unlock(&trace_types_lock);
4299 }
4300
4301 static int t_show(struct seq_file *m, void *v)
4302 {
4303 struct tracer *t = v;
4304
4305 if (!t)
4306 return 0;
4307
4308 seq_puts(m, t->name);
4309 if (t->next)
4310 seq_putc(m, ' ');
4311 else
4312 seq_putc(m, '\n');
4313
4314 return 0;
4315 }
4316
4317 static const struct seq_operations show_traces_seq_ops = {
4318 .start = t_start,
4319 .next = t_next,
4320 .stop = t_stop,
4321 .show = t_show,
4322 };
4323
4324 static int show_traces_open(struct inode *inode, struct file *file)
4325 {
4326 struct trace_array *tr = inode->i_private;
4327 struct seq_file *m;
4328 int ret;
4329
4330 if (tracing_disabled)
4331 return -ENODEV;
4332
4333 ret = seq_open(file, &show_traces_seq_ops);
4334 if (ret)
4335 return ret;
4336
4337 m = file->private_data;
4338 m->private = tr;
4339
4340 return 0;
4341 }
4342
4343 static ssize_t
4344 tracing_write_stub(struct file *filp, const char __user *ubuf,
4345 size_t count, loff_t *ppos)
4346 {
4347 return count;
4348 }
4349
4350 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4351 {
4352 int ret;
4353
4354 if (file->f_mode & FMODE_READ)
4355 ret = seq_lseek(file, offset, whence);
4356 else
4357 file->f_pos = ret = 0;
4358
4359 return ret;
4360 }
4361
4362 static const struct file_operations tracing_fops = {
4363 .open = tracing_open,
4364 .read = seq_read,
4365 .write = tracing_write_stub,
4366 .llseek = tracing_lseek,
4367 .release = tracing_release,
4368 };
4369
4370 static const struct file_operations show_traces_fops = {
4371 .open = show_traces_open,
4372 .read = seq_read,
4373 .release = seq_release,
4374 .llseek = seq_lseek,
4375 };
4376
4377 static ssize_t
4378 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4379 size_t count, loff_t *ppos)
4380 {
4381 struct trace_array *tr = file_inode(filp)->i_private;
4382 char *mask_str;
4383 int len;
4384
4385 len = snprintf(NULL, 0, "%*pb\n",
4386 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4387 mask_str = kmalloc(len, GFP_KERNEL);
4388 if (!mask_str)
4389 return -ENOMEM;
4390
4391 len = snprintf(mask_str, len, "%*pb\n",
4392 cpumask_pr_args(tr->tracing_cpumask));
4393 if (len >= count) {
4394 count = -EINVAL;
4395 goto out_err;
4396 }
4397 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4398
4399 out_err:
4400 kfree(mask_str);
4401
4402 return count;
4403 }
4404
4405 static ssize_t
4406 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4407 size_t count, loff_t *ppos)
4408 {
4409 struct trace_array *tr = file_inode(filp)->i_private;
4410 cpumask_var_t tracing_cpumask_new;
4411 int err, cpu;
4412
4413 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4414 return -ENOMEM;
4415
4416 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4417 if (err)
4418 goto err_unlock;
4419
4420 local_irq_disable();
4421 arch_spin_lock(&tr->max_lock);
4422 for_each_tracing_cpu(cpu) {
4423 /*
4424 * Increase/decrease the disabled counter if we are
4425 * about to flip a bit in the cpumask:
4426 */
4427 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4428 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4429 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4430 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4431 }
4432 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4433 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4434 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4435 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4436 }
4437 }
4438 arch_spin_unlock(&tr->max_lock);
4439 local_irq_enable();
4440
4441 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4442 free_cpumask_var(tracing_cpumask_new);
4443
4444 return count;
4445
4446 err_unlock:
4447 free_cpumask_var(tracing_cpumask_new);
4448
4449 return err;
4450 }
4451
4452 static const struct file_operations tracing_cpumask_fops = {
4453 .open = tracing_open_generic_tr,
4454 .read = tracing_cpumask_read,
4455 .write = tracing_cpumask_write,
4456 .release = tracing_release_generic_tr,
4457 .llseek = generic_file_llseek,
4458 };
4459
4460 static int tracing_trace_options_show(struct seq_file *m, void *v)
4461 {
4462 struct tracer_opt *trace_opts;
4463 struct trace_array *tr = m->private;
4464 u32 tracer_flags;
4465 int i;
4466
4467 mutex_lock(&trace_types_lock);
4468 tracer_flags = tr->current_trace->flags->val;
4469 trace_opts = tr->current_trace->flags->opts;
4470
4471 for (i = 0; trace_options[i]; i++) {
4472 if (tr->trace_flags & (1 << i))
4473 seq_printf(m, "%s\n", trace_options[i]);
4474 else
4475 seq_printf(m, "no%s\n", trace_options[i]);
4476 }
4477
4478 for (i = 0; trace_opts[i].name; i++) {
4479 if (tracer_flags & trace_opts[i].bit)
4480 seq_printf(m, "%s\n", trace_opts[i].name);
4481 else
4482 seq_printf(m, "no%s\n", trace_opts[i].name);
4483 }
4484 mutex_unlock(&trace_types_lock);
4485
4486 return 0;
4487 }
4488
4489 static int __set_tracer_option(struct trace_array *tr,
4490 struct tracer_flags *tracer_flags,
4491 struct tracer_opt *opts, int neg)
4492 {
4493 struct tracer *trace = tracer_flags->trace;
4494 int ret;
4495
4496 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4497 if (ret)
4498 return ret;
4499
4500 if (neg)
4501 tracer_flags->val &= ~opts->bit;
4502 else
4503 tracer_flags->val |= opts->bit;
4504 return 0;
4505 }
4506
4507 /* Try to assign a tracer specific option */
4508 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4509 {
4510 struct tracer *trace = tr->current_trace;
4511 struct tracer_flags *tracer_flags = trace->flags;
4512 struct tracer_opt *opts = NULL;
4513 int i;
4514
4515 for (i = 0; tracer_flags->opts[i].name; i++) {
4516 opts = &tracer_flags->opts[i];
4517
4518 if (strcmp(cmp, opts->name) == 0)
4519 return __set_tracer_option(tr, trace->flags, opts, neg);
4520 }
4521
4522 return -EINVAL;
4523 }
4524
4525 /* Some tracers require overwrite to stay enabled */
4526 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4527 {
4528 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4529 return -1;
4530
4531 return 0;
4532 }
4533
4534 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4535 {
4536 /* do nothing if flag is already set */
4537 if (!!(tr->trace_flags & mask) == !!enabled)
4538 return 0;
4539
4540 /* Give the tracer a chance to approve the change */
4541 if (tr->current_trace->flag_changed)
4542 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4543 return -EINVAL;
4544
4545 if (enabled)
4546 tr->trace_flags |= mask;
4547 else
4548 tr->trace_flags &= ~mask;
4549
4550 if (mask == TRACE_ITER_RECORD_CMD)
4551 trace_event_enable_cmd_record(enabled);
4552
4553 if (mask == TRACE_ITER_RECORD_TGID) {
4554 if (!tgid_map)
4555 tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4556 sizeof(*tgid_map),
4557 GFP_KERNEL);
4558 if (!tgid_map) {
4559 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4560 return -ENOMEM;
4561 }
4562
4563 trace_event_enable_tgid_record(enabled);
4564 }
4565
4566 if (mask == TRACE_ITER_EVENT_FORK)
4567 trace_event_follow_fork(tr, enabled);
4568
4569 if (mask == TRACE_ITER_FUNC_FORK)
4570 ftrace_pid_follow_fork(tr, enabled);
4571
4572 if (mask == TRACE_ITER_OVERWRITE) {
4573 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4574 #ifdef CONFIG_TRACER_MAX_TRACE
4575 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4576 #endif
4577 }
4578
4579 if (mask == TRACE_ITER_PRINTK) {
4580 trace_printk_start_stop_comm(enabled);
4581 trace_printk_control(enabled);
4582 }
4583
4584 return 0;
4585 }
4586
4587 static int trace_set_options(struct trace_array *tr, char *option)
4588 {
4589 char *cmp;
4590 int neg = 0;
4591 int ret;
4592 size_t orig_len = strlen(option);
4593 int len;
4594
4595 cmp = strstrip(option);
4596
4597 len = str_has_prefix(cmp, "no");
4598 if (len)
4599 neg = 1;
4600
4601 cmp += len;
4602
4603 mutex_lock(&trace_types_lock);
4604
4605 ret = match_string(trace_options, -1, cmp);
4606 /* If no option could be set, test the specific tracer options */
4607 if (ret < 0)
4608 ret = set_tracer_option(tr, cmp, neg);
4609 else
4610 ret = set_tracer_flag(tr, 1 << ret, !neg);
4611
4612 mutex_unlock(&trace_types_lock);
4613
4614 /*
4615 * If the first trailing whitespace is replaced with '\0' by strstrip,
4616 * turn it back into a space.
4617 */
4618 if (orig_len > strlen(option))
4619 option[strlen(option)] = ' ';
4620
4621 return ret;
4622 }
4623
4624 static void __init apply_trace_boot_options(void)
4625 {
4626 char *buf = trace_boot_options_buf;
4627 char *option;
4628
4629 while (true) {
4630 option = strsep(&buf, ",");
4631
4632 if (!option)
4633 break;
4634
4635 if (*option)
4636 trace_set_options(&global_trace, option);
4637
4638 /* Put back the comma to allow this to be called again */
4639 if (buf)
4640 *(buf - 1) = ',';
4641 }
4642 }
4643
4644 static ssize_t
4645 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4646 size_t cnt, loff_t *ppos)
4647 {
4648 struct seq_file *m = filp->private_data;
4649 struct trace_array *tr = m->private;
4650 char buf[64];
4651 int ret;
4652
4653 if (cnt >= sizeof(buf))
4654 return -EINVAL;
4655
4656 if (copy_from_user(buf, ubuf, cnt))
4657 return -EFAULT;
4658
4659 buf[cnt] = 0;
4660
4661 ret = trace_set_options(tr, buf);
4662 if (ret < 0)
4663 return ret;
4664
4665 *ppos += cnt;
4666
4667 return cnt;
4668 }
4669
4670 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4671 {
4672 struct trace_array *tr = inode->i_private;
4673 int ret;
4674
4675 if (tracing_disabled)
4676 return -ENODEV;
4677
4678 if (trace_array_get(tr) < 0)
4679 return -ENODEV;
4680
4681 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4682 if (ret < 0)
4683 trace_array_put(tr);
4684
4685 return ret;
4686 }
4687
4688 static const struct file_operations tracing_iter_fops = {
4689 .open = tracing_trace_options_open,
4690 .read = seq_read,
4691 .llseek = seq_lseek,
4692 .release = tracing_single_release_tr,
4693 .write = tracing_trace_options_write,
4694 };
4695
4696 static const char readme_msg[] =
4697 "tracing mini-HOWTO:\n\n"
4698 "# echo 0 > tracing_on : quick way to disable tracing\n"
4699 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4700 " Important files:\n"
4701 " trace\t\t\t- The static contents of the buffer\n"
4702 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4703 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4704 " current_tracer\t- function and latency tracers\n"
4705 " available_tracers\t- list of configured tracers for current_tracer\n"
4706 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4707 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4708 " trace_clock\t\t-change the clock used to order events\n"
4709 " local: Per cpu clock but may not be synced across CPUs\n"
4710 " global: Synced across CPUs but slows tracing down.\n"
4711 " counter: Not a clock, but just an increment\n"
4712 " uptime: Jiffy counter from time of boot\n"
4713 " perf: Same clock that perf events use\n"
4714 #ifdef CONFIG_X86_64
4715 " x86-tsc: TSC cycle counter\n"
4716 #endif
4717 "\n timestamp_mode\t-view the mode used to timestamp events\n"
4718 " delta: Delta difference against a buffer-wide timestamp\n"
4719 " absolute: Absolute (standalone) timestamp\n"
4720 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4721 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4722 " tracing_cpumask\t- Limit which CPUs to trace\n"
4723 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4724 "\t\t\t Remove sub-buffer with rmdir\n"
4725 " trace_options\t\t- Set format or modify how tracing happens\n"
4726 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4727 "\t\t\t option name\n"
4728 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4729 #ifdef CONFIG_DYNAMIC_FTRACE
4730 "\n available_filter_functions - list of functions that can be filtered on\n"
4731 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4732 "\t\t\t functions\n"
4733 "\t accepts: func_full_name or glob-matching-pattern\n"
4734 "\t modules: Can select a group via module\n"
4735 "\t Format: :mod:<module-name>\n"
4736 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4737 "\t triggers: a command to perform when function is hit\n"
4738 "\t Format: <function>:<trigger>[:count]\n"
4739 "\t trigger: traceon, traceoff\n"
4740 "\t\t enable_event:<system>:<event>\n"
4741 "\t\t disable_event:<system>:<event>\n"
4742 #ifdef CONFIG_STACKTRACE
4743 "\t\t stacktrace\n"
4744 #endif
4745 #ifdef CONFIG_TRACER_SNAPSHOT
4746 "\t\t snapshot\n"
4747 #endif
4748 "\t\t dump\n"
4749 "\t\t cpudump\n"
4750 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4751 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4752 "\t The first one will disable tracing every time do_fault is hit\n"
4753 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4754 "\t The first time do trap is hit and it disables tracing, the\n"
4755 "\t counter will decrement to 2. If tracing is already disabled,\n"
4756 "\t the counter will not decrement. It only decrements when the\n"
4757 "\t trigger did work\n"
4758 "\t To remove trigger without count:\n"
4759 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4760 "\t To remove trigger with a count:\n"
4761 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4762 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4763 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4764 "\t modules: Can select a group via module command :mod:\n"
4765 "\t Does not accept triggers\n"
4766 #endif /* CONFIG_DYNAMIC_FTRACE */
4767 #ifdef CONFIG_FUNCTION_TRACER
4768 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4769 "\t\t (function)\n"
4770 #endif
4771 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4772 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4773 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4774 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4775 #endif
4776 #ifdef CONFIG_TRACER_SNAPSHOT
4777 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4778 "\t\t\t snapshot buffer. Read the contents for more\n"
4779 "\t\t\t information\n"
4780 #endif
4781 #ifdef CONFIG_STACK_TRACER
4782 " stack_trace\t\t- Shows the max stack trace when active\n"
4783 " stack_max_size\t- Shows current max stack size that was traced\n"
4784 "\t\t\t Write into this file to reset the max size (trigger a\n"
4785 "\t\t\t new trace)\n"
4786 #ifdef CONFIG_DYNAMIC_FTRACE
4787 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4788 "\t\t\t traces\n"
4789 #endif
4790 #endif /* CONFIG_STACK_TRACER */
4791 #ifdef CONFIG_DYNAMIC_EVENTS
4792 " dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4793 "\t\t\t Write into this file to define/undefine new trace events.\n"
4794 #endif
4795 #ifdef CONFIG_KPROBE_EVENTS
4796 " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4797 "\t\t\t Write into this file to define/undefine new trace events.\n"
4798 #endif
4799 #ifdef CONFIG_UPROBE_EVENTS
4800 " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4801 "\t\t\t Write into this file to define/undefine new trace events.\n"
4802 #endif
4803 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4804 "\t accepts: event-definitions (one definition per line)\n"
4805 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
4806 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4807 #ifdef CONFIG_HIST_TRIGGERS
4808 "\t s:[synthetic/]<event> <field> [<field>]\n"
4809 #endif
4810 "\t -:[<group>/]<event>\n"
4811 #ifdef CONFIG_KPROBE_EVENTS
4812 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4813 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4814 #endif
4815 #ifdef CONFIG_UPROBE_EVENTS
4816 " place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4817 #endif
4818 "\t args: <name>=fetcharg[:type]\n"
4819 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4820 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4821 "\t $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4822 #else
4823 "\t $stack<index>, $stack, $retval, $comm\n"
4824 #endif
4825 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4826 "\t b<bit-width>@<bit-offset>/<container-size>,\n"
4827 "\t <type>\\[<array-size>\\]\n"
4828 #ifdef CONFIG_HIST_TRIGGERS
4829 "\t field: <stype> <name>;\n"
4830 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4831 "\t [unsigned] char/int/long\n"
4832 #endif
4833 #endif
4834 " events/\t\t- Directory containing all trace event subsystems:\n"
4835 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4836 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4837 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4838 "\t\t\t events\n"
4839 " filter\t\t- If set, only events passing filter are traced\n"
4840 " events/<system>/<event>/\t- Directory containing control files for\n"
4841 "\t\t\t <event>:\n"
4842 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4843 " filter\t\t- If set, only events passing filter are traced\n"
4844 " trigger\t\t- If set, a command to perform when event is hit\n"
4845 "\t Format: <trigger>[:count][if <filter>]\n"
4846 "\t trigger: traceon, traceoff\n"
4847 "\t enable_event:<system>:<event>\n"
4848 "\t disable_event:<system>:<event>\n"
4849 #ifdef CONFIG_HIST_TRIGGERS
4850 "\t enable_hist:<system>:<event>\n"
4851 "\t disable_hist:<system>:<event>\n"
4852 #endif
4853 #ifdef CONFIG_STACKTRACE
4854 "\t\t stacktrace\n"
4855 #endif
4856 #ifdef CONFIG_TRACER_SNAPSHOT
4857 "\t\t snapshot\n"
4858 #endif
4859 #ifdef CONFIG_HIST_TRIGGERS
4860 "\t\t hist (see below)\n"
4861 #endif
4862 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4863 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4864 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4865 "\t events/block/block_unplug/trigger\n"
4866 "\t The first disables tracing every time block_unplug is hit.\n"
4867 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4868 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4869 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4870 "\t Like function triggers, the counter is only decremented if it\n"
4871 "\t enabled or disabled tracing.\n"
4872 "\t To remove a trigger without a count:\n"
4873 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4874 "\t To remove a trigger with a count:\n"
4875 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4876 "\t Filters can be ignored when removing a trigger.\n"
4877 #ifdef CONFIG_HIST_TRIGGERS
4878 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4879 "\t Format: hist:keys=<field1[,field2,...]>\n"
4880 "\t [:values=<field1[,field2,...]>]\n"
4881 "\t [:sort=<field1[,field2,...]>]\n"
4882 "\t [:size=#entries]\n"
4883 "\t [:pause][:continue][:clear]\n"
4884 "\t [:name=histname1]\n"
4885 "\t [:<handler>.<action>]\n"
4886 "\t [if <filter>]\n\n"
4887 "\t When a matching event is hit, an entry is added to a hash\n"
4888 "\t table using the key(s) and value(s) named, and the value of a\n"
4889 "\t sum called 'hitcount' is incremented. Keys and values\n"
4890 "\t correspond to fields in the event's format description. Keys\n"
4891 "\t can be any field, or the special string 'stacktrace'.\n"
4892 "\t Compound keys consisting of up to two fields can be specified\n"
4893 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4894 "\t fields. Sort keys consisting of up to two fields can be\n"
4895 "\t specified using the 'sort' keyword. The sort direction can\n"
4896 "\t be modified by appending '.descending' or '.ascending' to a\n"
4897 "\t sort field. The 'size' parameter can be used to specify more\n"
4898 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4899 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4900 "\t its histogram data will be shared with other triggers of the\n"
4901 "\t same name, and trigger hits will update this common data.\n\n"
4902 "\t Reading the 'hist' file for the event will dump the hash\n"
4903 "\t table in its entirety to stdout. If there are multiple hist\n"
4904 "\t triggers attached to an event, there will be a table for each\n"
4905 "\t trigger in the output. The table displayed for a named\n"
4906 "\t trigger will be the same as any other instance having the\n"
4907 "\t same name. The default format used to display a given field\n"
4908 "\t can be modified by appending any of the following modifiers\n"
4909 "\t to the field name, as applicable:\n\n"
4910 "\t .hex display a number as a hex value\n"
4911 "\t .sym display an address as a symbol\n"
4912 "\t .sym-offset display an address as a symbol and offset\n"
4913 "\t .execname display a common_pid as a program name\n"
4914 "\t .syscall display a syscall id as a syscall name\n"
4915 "\t .log2 display log2 value rather than raw number\n"
4916 "\t .usecs display a common_timestamp in microseconds\n\n"
4917 "\t The 'pause' parameter can be used to pause an existing hist\n"
4918 "\t trigger or to start a hist trigger but not log any events\n"
4919 "\t until told to do so. 'continue' can be used to start or\n"
4920 "\t restart a paused hist trigger.\n\n"
4921 "\t The 'clear' parameter will clear the contents of a running\n"
4922 "\t hist trigger and leave its current paused/active state\n"
4923 "\t unchanged.\n\n"
4924 "\t The enable_hist and disable_hist triggers can be used to\n"
4925 "\t have one event conditionally start and stop another event's\n"
4926 "\t already-attached hist trigger. The syntax is analogous to\n"
4927 "\t the enable_event and disable_event triggers.\n\n"
4928 "\t Hist trigger handlers and actions are executed whenever a\n"
4929 "\t a histogram entry is added or updated. They take the form:\n\n"
4930 "\t <handler>.<action>\n\n"
4931 "\t The available handlers are:\n\n"
4932 "\t onmatch(matching.event) - invoke on addition or update\n"
4933 "\t onmax(var) - invoke if var exceeds current max\n"
4934 "\t onchange(var) - invoke action if var changes\n\n"
4935 "\t The available actions are:\n\n"
4936 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
4937 "\t save(field,...) - save current event fields\n"
4938 #ifdef CONFIG_TRACER_SNAPSHOT
4939 "\t snapshot() - snapshot the trace buffer\n"
4940 #endif
4941 #endif
4942 ;
4943
4944 static ssize_t
4945 tracing_readme_read(struct file *filp, char __user *ubuf,
4946 size_t cnt, loff_t *ppos)
4947 {
4948 return simple_read_from_buffer(ubuf, cnt, ppos,
4949 readme_msg, strlen(readme_msg));
4950 }
4951
4952 static const struct file_operations tracing_readme_fops = {
4953 .open = tracing_open_generic,
4954 .read = tracing_readme_read,
4955 .llseek = generic_file_llseek,
4956 };
4957
4958 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4959 {
4960 int *ptr = v;
4961
4962 if (*pos || m->count)
4963 ptr++;
4964
4965 (*pos)++;
4966
4967 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4968 if (trace_find_tgid(*ptr))
4969 return ptr;
4970 }
4971
4972 return NULL;
4973 }
4974
4975 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4976 {
4977 void *v;
4978 loff_t l = 0;
4979
4980 if (!tgid_map)
4981 return NULL;
4982
4983 v = &tgid_map[0];
4984 while (l <= *pos) {
4985 v = saved_tgids_next(m, v, &l);
4986 if (!v)
4987 return NULL;
4988 }
4989
4990 return v;
4991 }
4992
4993 static void saved_tgids_stop(struct seq_file *m, void *v)
4994 {
4995 }
4996
4997 static int saved_tgids_show(struct seq_file *m, void *v)
4998 {
4999 int pid = (int *)v - tgid_map;
5000
5001 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5002 return 0;
5003 }
5004
5005 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5006 .start = saved_tgids_start,
5007 .stop = saved_tgids_stop,
5008 .next = saved_tgids_next,
5009 .show = saved_tgids_show,
5010 };
5011
5012 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5013 {
5014 if (tracing_disabled)
5015 return -ENODEV;
5016
5017 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5018 }
5019
5020
5021 static const struct file_operations tracing_saved_tgids_fops = {
5022 .open = tracing_saved_tgids_open,
5023 .read = seq_read,
5024 .llseek = seq_lseek,
5025 .release = seq_release,
5026 };
5027
5028 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5029 {
5030 unsigned int *ptr = v;
5031
5032 if (*pos || m->count)
5033 ptr++;
5034
5035 (*pos)++;
5036
5037 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5038 ptr++) {
5039 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5040 continue;
5041
5042 return ptr;
5043 }
5044
5045 return NULL;
5046 }
5047
5048 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5049 {
5050 void *v;
5051 loff_t l = 0;
5052
5053 preempt_disable();
5054 arch_spin_lock(&trace_cmdline_lock);
5055
5056 v = &savedcmd->map_cmdline_to_pid[0];
5057 while (l <= *pos) {
5058 v = saved_cmdlines_next(m, v, &l);
5059 if (!v)
5060 return NULL;
5061 }
5062
5063 return v;
5064 }
5065
5066 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5067 {
5068 arch_spin_unlock(&trace_cmdline_lock);
5069 preempt_enable();
5070 }
5071
5072 static int saved_cmdlines_show(struct seq_file *m, void *v)
5073 {
5074 char buf[TASK_COMM_LEN];
5075 unsigned int *pid = v;
5076
5077 __trace_find_cmdline(*pid, buf);
5078 seq_printf(m, "%d %s\n", *pid, buf);
5079 return 0;
5080 }
5081
5082 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5083 .start = saved_cmdlines_start,
5084 .next = saved_cmdlines_next,
5085 .stop = saved_cmdlines_stop,
5086 .show = saved_cmdlines_show,
5087 };
5088
5089 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5090 {
5091 if (tracing_disabled)
5092 return -ENODEV;
5093
5094 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5095 }
5096
5097 static const struct file_operations tracing_saved_cmdlines_fops = {
5098 .open = tracing_saved_cmdlines_open,
5099 .read = seq_read,
5100 .llseek = seq_lseek,
5101 .release = seq_release,
5102 };
5103
5104 static ssize_t
5105 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5106 size_t cnt, loff_t *ppos)
5107 {
5108 char buf[64];
5109 int r;
5110
5111 arch_spin_lock(&trace_cmdline_lock);
5112 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5113 arch_spin_unlock(&trace_cmdline_lock);
5114
5115 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5116 }
5117
5118 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5119 {
5120 kfree(s->saved_cmdlines);
5121 kfree(s->map_cmdline_to_pid);
5122 kfree(s);
5123 }
5124
5125 static int tracing_resize_saved_cmdlines(unsigned int val)
5126 {
5127 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5128
5129 s = kmalloc(sizeof(*s), GFP_KERNEL);
5130 if (!s)
5131 return -ENOMEM;
5132
5133 if (allocate_cmdlines_buffer(val, s) < 0) {
5134 kfree(s);
5135 return -ENOMEM;
5136 }
5137
5138 arch_spin_lock(&trace_cmdline_lock);
5139 savedcmd_temp = savedcmd;
5140 savedcmd = s;
5141 arch_spin_unlock(&trace_cmdline_lock);
5142 free_saved_cmdlines_buffer(savedcmd_temp);
5143
5144 return 0;
5145 }
5146
5147 static ssize_t
5148 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5149 size_t cnt, loff_t *ppos)
5150 {
5151 unsigned long val;
5152 int ret;
5153
5154 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5155 if (ret)
5156 return ret;
5157
5158 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5159 if (!val || val > PID_MAX_DEFAULT)
5160 return -EINVAL;
5161
5162 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5163 if (ret < 0)
5164 return ret;
5165
5166 *ppos += cnt;
5167
5168 return cnt;
5169 }
5170
5171 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5172 .open = tracing_open_generic,
5173 .read = tracing_saved_cmdlines_size_read,
5174 .write = tracing_saved_cmdlines_size_write,
5175 };
5176
5177 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5178 static union trace_eval_map_item *
5179 update_eval_map(union trace_eval_map_item *ptr)
5180 {
5181 if (!ptr->map.eval_string) {
5182 if (ptr->tail.next) {
5183 ptr = ptr->tail.next;
5184 /* Set ptr to the next real item (skip head) */
5185 ptr++;
5186 } else
5187 return NULL;
5188 }
5189 return ptr;
5190 }
5191
5192 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5193 {
5194 union trace_eval_map_item *ptr = v;
5195
5196 /*
5197 * Paranoid! If ptr points to end, we don't want to increment past it.
5198 * This really should never happen.
5199 */
5200 ptr = update_eval_map(ptr);
5201 if (WARN_ON_ONCE(!ptr))
5202 return NULL;
5203
5204 ptr++;
5205
5206 (*pos)++;
5207
5208 ptr = update_eval_map(ptr);
5209
5210 return ptr;
5211 }
5212
5213 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5214 {
5215 union trace_eval_map_item *v;
5216 loff_t l = 0;
5217
5218 mutex_lock(&trace_eval_mutex);
5219
5220 v = trace_eval_maps;
5221 if (v)
5222 v++;
5223
5224 while (v && l < *pos) {
5225 v = eval_map_next(m, v, &l);
5226 }
5227
5228 return v;
5229 }
5230
5231 static void eval_map_stop(struct seq_file *m, void *v)
5232 {
5233 mutex_unlock(&trace_eval_mutex);
5234 }
5235
5236 static int eval_map_show(struct seq_file *m, void *v)
5237 {
5238 union trace_eval_map_item *ptr = v;
5239
5240 seq_printf(m, "%s %ld (%s)\n",
5241 ptr->map.eval_string, ptr->map.eval_value,
5242 ptr->map.system);
5243
5244 return 0;
5245 }
5246
5247 static const struct seq_operations tracing_eval_map_seq_ops = {
5248 .start = eval_map_start,
5249 .next = eval_map_next,
5250 .stop = eval_map_stop,
5251 .show = eval_map_show,
5252 };
5253
5254 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5255 {
5256 if (tracing_disabled)
5257 return -ENODEV;
5258
5259 return seq_open(filp, &tracing_eval_map_seq_ops);
5260 }
5261
5262 static const struct file_operations tracing_eval_map_fops = {
5263 .open = tracing_eval_map_open,
5264 .read = seq_read,
5265 .llseek = seq_lseek,
5266 .release = seq_release,
5267 };
5268
5269 static inline union trace_eval_map_item *
5270 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5271 {
5272 /* Return tail of array given the head */
5273 return ptr + ptr->head.length + 1;
5274 }
5275
5276 static void
5277 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5278 int len)
5279 {
5280 struct trace_eval_map **stop;
5281 struct trace_eval_map **map;
5282 union trace_eval_map_item *map_array;
5283 union trace_eval_map_item *ptr;
5284
5285 stop = start + len;
5286
5287 /*
5288 * The trace_eval_maps contains the map plus a head and tail item,
5289 * where the head holds the module and length of array, and the
5290 * tail holds a pointer to the next list.
5291 */
5292 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5293 if (!map_array) {
5294 pr_warn("Unable to allocate trace eval mapping\n");
5295 return;
5296 }
5297
5298 mutex_lock(&trace_eval_mutex);
5299
5300 if (!trace_eval_maps)
5301 trace_eval_maps = map_array;
5302 else {
5303 ptr = trace_eval_maps;
5304 for (;;) {
5305 ptr = trace_eval_jmp_to_tail(ptr);
5306 if (!ptr->tail.next)
5307 break;
5308 ptr = ptr->tail.next;
5309
5310 }
5311 ptr->tail.next = map_array;
5312 }
5313 map_array->head.mod = mod;
5314 map_array->head.length = len;
5315 map_array++;
5316
5317 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5318 map_array->map = **map;
5319 map_array++;
5320 }
5321 memset(map_array, 0, sizeof(*map_array));
5322
5323 mutex_unlock(&trace_eval_mutex);
5324 }
5325
5326 static void trace_create_eval_file(struct dentry *d_tracer)
5327 {
5328 trace_create_file("eval_map", 0444, d_tracer,
5329 NULL, &tracing_eval_map_fops);
5330 }
5331
5332 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5333 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5334 static inline void trace_insert_eval_map_file(struct module *mod,
5335 struct trace_eval_map **start, int len) { }
5336 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5337
5338 static void trace_insert_eval_map(struct module *mod,
5339 struct trace_eval_map **start, int len)
5340 {
5341 struct trace_eval_map **map;
5342
5343 if (len <= 0)
5344 return;
5345
5346 map = start;
5347
5348 trace_event_eval_update(map, len);
5349
5350 trace_insert_eval_map_file(mod, start, len);
5351 }
5352
5353 static ssize_t
5354 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5355 size_t cnt, loff_t *ppos)
5356 {
5357 struct trace_array *tr = filp->private_data;
5358 char buf[MAX_TRACER_SIZE+2];
5359 int r;
5360
5361 mutex_lock(&trace_types_lock);
5362 r = sprintf(buf, "%s\n", tr->current_trace->name);
5363 mutex_unlock(&trace_types_lock);
5364
5365 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5366 }
5367
5368 int tracer_init(struct tracer *t, struct trace_array *tr)
5369 {
5370 tracing_reset_online_cpus(&tr->trace_buffer);
5371 return t->init(tr);
5372 }
5373
5374 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5375 {
5376 int cpu;
5377
5378 for_each_tracing_cpu(cpu)
5379 per_cpu_ptr(buf->data, cpu)->entries = val;
5380 }
5381
5382 #ifdef CONFIG_TRACER_MAX_TRACE
5383 /* resize @tr's buffer to the size of @size_tr's entries */
5384 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5385 struct trace_buffer *size_buf, int cpu_id)
5386 {
5387 int cpu, ret = 0;
5388
5389 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5390 for_each_tracing_cpu(cpu) {
5391 ret = ring_buffer_resize(trace_buf->buffer,
5392 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5393 if (ret < 0)
5394 break;
5395 per_cpu_ptr(trace_buf->data, cpu)->entries =
5396 per_cpu_ptr(size_buf->data, cpu)->entries;
5397 }
5398 } else {
5399 ret = ring_buffer_resize(trace_buf->buffer,
5400 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5401 if (ret == 0)
5402 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5403 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5404 }
5405
5406 return ret;
5407 }
5408 #endif /* CONFIG_TRACER_MAX_TRACE */
5409
5410 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5411 unsigned long size, int cpu)
5412 {
5413 int ret;
5414
5415 /*
5416 * If kernel or user changes the size of the ring buffer
5417 * we use the size that was given, and we can forget about
5418 * expanding it later.
5419 */
5420 ring_buffer_expanded = true;
5421
5422 /* May be called before buffers are initialized */
5423 if (!tr->trace_buffer.buffer)
5424 return 0;
5425
5426 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5427 if (ret < 0)
5428 return ret;
5429
5430 #ifdef CONFIG_TRACER_MAX_TRACE
5431 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5432 !tr->current_trace->use_max_tr)
5433 goto out;
5434
5435 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5436 if (ret < 0) {
5437 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5438 &tr->trace_buffer, cpu);
5439 if (r < 0) {
5440 /*
5441 * AARGH! We are left with different
5442 * size max buffer!!!!
5443 * The max buffer is our "snapshot" buffer.
5444 * When a tracer needs a snapshot (one of the
5445 * latency tracers), it swaps the max buffer
5446 * with the saved snap shot. We succeeded to
5447 * update the size of the main buffer, but failed to
5448 * update the size of the max buffer. But when we tried
5449 * to reset the main buffer to the original size, we
5450 * failed there too. This is very unlikely to
5451 * happen, but if it does, warn and kill all
5452 * tracing.
5453 */
5454 WARN_ON(1);
5455 tracing_disabled = 1;
5456 }
5457 return ret;
5458 }
5459
5460 if (cpu == RING_BUFFER_ALL_CPUS)
5461 set_buffer_entries(&tr->max_buffer, size);
5462 else
5463 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5464
5465 out:
5466 #endif /* CONFIG_TRACER_MAX_TRACE */
5467
5468 if (cpu == RING_BUFFER_ALL_CPUS)
5469 set_buffer_entries(&tr->trace_buffer, size);
5470 else
5471 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5472
5473 return ret;
5474 }
5475
5476 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5477 unsigned long size, int cpu_id)
5478 {
5479 int ret = size;
5480
5481 mutex_lock(&trace_types_lock);
5482
5483 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5484 /* make sure, this cpu is enabled in the mask */
5485 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5486 ret = -EINVAL;
5487 goto out;
5488 }
5489 }
5490
5491 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5492 if (ret < 0)
5493 ret = -ENOMEM;
5494
5495 out:
5496 mutex_unlock(&trace_types_lock);
5497
5498 return ret;
5499 }
5500
5501
5502 /**
5503 * tracing_update_buffers - used by tracing facility to expand ring buffers
5504 *
5505 * To save on memory when the tracing is never used on a system with it
5506 * configured in. The ring buffers are set to a minimum size. But once
5507 * a user starts to use the tracing facility, then they need to grow
5508 * to their default size.
5509 *
5510 * This function is to be called when a tracer is about to be used.
5511 */
5512 int tracing_update_buffers(void)
5513 {
5514 int ret = 0;
5515
5516 mutex_lock(&trace_types_lock);
5517 if (!ring_buffer_expanded)
5518 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5519 RING_BUFFER_ALL_CPUS);
5520 mutex_unlock(&trace_types_lock);
5521
5522 return ret;
5523 }
5524
5525 struct trace_option_dentry;
5526
5527 static void
5528 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5529
5530 /*
5531 * Used to clear out the tracer before deletion of an instance.
5532 * Must have trace_types_lock held.
5533 */
5534 static void tracing_set_nop(struct trace_array *tr)
5535 {
5536 if (tr->current_trace == &nop_trace)
5537 return;
5538
5539 tr->current_trace->enabled--;
5540
5541 if (tr->current_trace->reset)
5542 tr->current_trace->reset(tr);
5543
5544 tr->current_trace = &nop_trace;
5545 }
5546
5547 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5548 {
5549 /* Only enable if the directory has been created already. */
5550 if (!tr->dir)
5551 return;
5552
5553 create_trace_option_files(tr, t);
5554 }
5555
5556 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5557 {
5558 struct tracer *t;
5559 #ifdef CONFIG_TRACER_MAX_TRACE
5560 bool had_max_tr;
5561 #endif
5562 int ret = 0;
5563
5564 mutex_lock(&trace_types_lock);
5565
5566 if (!ring_buffer_expanded) {
5567 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5568 RING_BUFFER_ALL_CPUS);
5569 if (ret < 0)
5570 goto out;
5571 ret = 0;
5572 }
5573
5574 for (t = trace_types; t; t = t->next) {
5575 if (strcmp(t->name, buf) == 0)
5576 break;
5577 }
5578 if (!t) {
5579 ret = -EINVAL;
5580 goto out;
5581 }
5582 if (t == tr->current_trace)
5583 goto out;
5584
5585 #ifdef CONFIG_TRACER_SNAPSHOT
5586 if (t->use_max_tr) {
5587 arch_spin_lock(&tr->max_lock);
5588 if (tr->cond_snapshot)
5589 ret = -EBUSY;
5590 arch_spin_unlock(&tr->max_lock);
5591 if (ret)
5592 goto out;
5593 }
5594 #endif
5595 /* Some tracers won't work on kernel command line */
5596 if (system_state < SYSTEM_RUNNING && t->noboot) {
5597 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5598 t->name);
5599 goto out;
5600 }
5601
5602 /* Some tracers are only allowed for the top level buffer */
5603 if (!trace_ok_for_array(t, tr)) {
5604 ret = -EINVAL;
5605 goto out;
5606 }
5607
5608 /* If trace pipe files are being read, we can't change the tracer */
5609 if (tr->current_trace->ref) {
5610 ret = -EBUSY;
5611 goto out;
5612 }
5613
5614 trace_branch_disable();
5615
5616 tr->current_trace->enabled--;
5617
5618 if (tr->current_trace->reset)
5619 tr->current_trace->reset(tr);
5620
5621 /* Current trace needs to be nop_trace before synchronize_rcu */
5622 tr->current_trace = &nop_trace;
5623
5624 #ifdef CONFIG_TRACER_MAX_TRACE
5625 had_max_tr = tr->allocated_snapshot;
5626
5627 if (had_max_tr && !t->use_max_tr) {
5628 /*
5629 * We need to make sure that the update_max_tr sees that
5630 * current_trace changed to nop_trace to keep it from
5631 * swapping the buffers after we resize it.
5632 * The update_max_tr is called from interrupts disabled
5633 * so a synchronized_sched() is sufficient.
5634 */
5635 synchronize_rcu();
5636 free_snapshot(tr);
5637 }
5638 #endif
5639
5640 #ifdef CONFIG_TRACER_MAX_TRACE
5641 if (t->use_max_tr && !had_max_tr) {
5642 ret = tracing_alloc_snapshot_instance(tr);
5643 if (ret < 0)
5644 goto out;
5645 }
5646 #endif
5647
5648 if (t->init) {
5649 ret = tracer_init(t, tr);
5650 if (ret)
5651 goto out;
5652 }
5653
5654 tr->current_trace = t;
5655 tr->current_trace->enabled++;
5656 trace_branch_enable(tr);
5657 out:
5658 mutex_unlock(&trace_types_lock);
5659
5660 return ret;
5661 }
5662
5663 static ssize_t
5664 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5665 size_t cnt, loff_t *ppos)
5666 {
5667 struct trace_array *tr = filp->private_data;
5668 char buf[MAX_TRACER_SIZE+1];
5669 int i;
5670 size_t ret;
5671 int err;
5672
5673 ret = cnt;
5674
5675 if (cnt > MAX_TRACER_SIZE)
5676 cnt = MAX_TRACER_SIZE;
5677
5678 if (copy_from_user(buf, ubuf, cnt))
5679 return -EFAULT;
5680
5681 buf[cnt] = 0;
5682
5683 /* strip ending whitespace. */
5684 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5685 buf[i] = 0;
5686
5687 err = tracing_set_tracer(tr, buf);
5688 if (err)
5689 return err;
5690
5691 *ppos += ret;
5692
5693 return ret;
5694 }
5695
5696 static ssize_t
5697 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5698 size_t cnt, loff_t *ppos)
5699 {
5700 char buf[64];
5701 int r;
5702
5703 r = snprintf(buf, sizeof(buf), "%ld\n",
5704 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5705 if (r > sizeof(buf))
5706 r = sizeof(buf);
5707 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5708 }
5709
5710 static ssize_t
5711 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5712 size_t cnt, loff_t *ppos)
5713 {
5714 unsigned long val;
5715 int ret;
5716
5717 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5718 if (ret)
5719 return ret;
5720
5721 *ptr = val * 1000;
5722
5723 return cnt;
5724 }
5725
5726 static ssize_t
5727 tracing_thresh_read(struct file *filp, char __user *ubuf,
5728 size_t cnt, loff_t *ppos)
5729 {
5730 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5731 }
5732
5733 static ssize_t
5734 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5735 size_t cnt, loff_t *ppos)
5736 {
5737 struct trace_array *tr = filp->private_data;
5738 int ret;
5739
5740 mutex_lock(&trace_types_lock);
5741 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5742 if (ret < 0)
5743 goto out;
5744
5745 if (tr->current_trace->update_thresh) {
5746 ret = tr->current_trace->update_thresh(tr);
5747 if (ret < 0)
5748 goto out;
5749 }
5750
5751 ret = cnt;
5752 out:
5753 mutex_unlock(&trace_types_lock);
5754
5755 return ret;
5756 }
5757
5758 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5759
5760 static ssize_t
5761 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5762 size_t cnt, loff_t *ppos)
5763 {
5764 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5765 }
5766
5767 static ssize_t
5768 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5769 size_t cnt, loff_t *ppos)
5770 {
5771 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5772 }
5773
5774 #endif
5775
5776 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5777 {
5778 struct trace_array *tr = inode->i_private;
5779 struct trace_iterator *iter;
5780 int ret = 0;
5781
5782 if (tracing_disabled)
5783 return -ENODEV;
5784
5785 if (trace_array_get(tr) < 0)
5786 return -ENODEV;
5787
5788 mutex_lock(&trace_types_lock);
5789
5790 /* create a buffer to store the information to pass to userspace */
5791 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5792 if (!iter) {
5793 ret = -ENOMEM;
5794 __trace_array_put(tr);
5795 goto out;
5796 }
5797
5798 trace_seq_init(&iter->seq);
5799 iter->trace = tr->current_trace;
5800
5801 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5802 ret = -ENOMEM;
5803 goto fail;
5804 }
5805
5806 /* trace pipe does not show start of buffer */
5807 cpumask_setall(iter->started);
5808
5809 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5810 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5811
5812 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5813 if (trace_clocks[tr->clock_id].in_ns)
5814 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5815
5816 iter->tr = tr;
5817 iter->trace_buffer = &tr->trace_buffer;
5818 iter->cpu_file = tracing_get_cpu(inode);
5819 mutex_init(&iter->mutex);
5820 filp->private_data = iter;
5821
5822 if (iter->trace->pipe_open)
5823 iter->trace->pipe_open(iter);
5824
5825 nonseekable_open(inode, filp);
5826
5827 tr->current_trace->ref++;
5828 out:
5829 mutex_unlock(&trace_types_lock);
5830 return ret;
5831
5832 fail:
5833 kfree(iter);
5834 __trace_array_put(tr);
5835 mutex_unlock(&trace_types_lock);
5836 return ret;
5837 }
5838
5839 static int tracing_release_pipe(struct inode *inode, struct file *file)
5840 {
5841 struct trace_iterator *iter = file->private_data;
5842 struct trace_array *tr = inode->i_private;
5843
5844 mutex_lock(&trace_types_lock);
5845
5846 tr->current_trace->ref--;
5847
5848 if (iter->trace->pipe_close)
5849 iter->trace->pipe_close(iter);
5850
5851 mutex_unlock(&trace_types_lock);
5852
5853 free_cpumask_var(iter->started);
5854 mutex_destroy(&iter->mutex);
5855 kfree(iter);
5856
5857 trace_array_put(tr);
5858
5859 return 0;
5860 }
5861
5862 static __poll_t
5863 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5864 {
5865 struct trace_array *tr = iter->tr;
5866
5867 /* Iterators are static, they should be filled or empty */
5868 if (trace_buffer_iter(iter, iter->cpu_file))
5869 return EPOLLIN | EPOLLRDNORM;
5870
5871 if (tr->trace_flags & TRACE_ITER_BLOCK)
5872 /*
5873 * Always select as readable when in blocking mode
5874 */
5875 return EPOLLIN | EPOLLRDNORM;
5876 else
5877 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5878 filp, poll_table);
5879 }
5880
5881 static __poll_t
5882 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5883 {
5884 struct trace_iterator *iter = filp->private_data;
5885
5886 return trace_poll(iter, filp, poll_table);
5887 }
5888
5889 /* Must be called with iter->mutex held. */
5890 static int tracing_wait_pipe(struct file *filp)
5891 {
5892 struct trace_iterator *iter = filp->private_data;
5893 int ret;
5894
5895 while (trace_empty(iter)) {
5896
5897 if ((filp->f_flags & O_NONBLOCK)) {
5898 return -EAGAIN;
5899 }
5900
5901 /*
5902 * We block until we read something and tracing is disabled.
5903 * We still block if tracing is disabled, but we have never
5904 * read anything. This allows a user to cat this file, and
5905 * then enable tracing. But after we have read something,
5906 * we give an EOF when tracing is again disabled.
5907 *
5908 * iter->pos will be 0 if we haven't read anything.
5909 */
5910 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5911 break;
5912
5913 mutex_unlock(&iter->mutex);
5914
5915 ret = wait_on_pipe(iter, 0);
5916
5917 mutex_lock(&iter->mutex);
5918
5919 if (ret)
5920 return ret;
5921 }
5922
5923 return 1;
5924 }
5925
5926 /*
5927 * Consumer reader.
5928 */
5929 static ssize_t
5930 tracing_read_pipe(struct file *filp, char __user *ubuf,
5931 size_t cnt, loff_t *ppos)
5932 {
5933 struct trace_iterator *iter = filp->private_data;
5934 ssize_t sret;
5935
5936 /*
5937 * Avoid more than one consumer on a single file descriptor
5938 * This is just a matter of traces coherency, the ring buffer itself
5939 * is protected.
5940 */
5941 mutex_lock(&iter->mutex);
5942
5943 /* return any leftover data */
5944 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5945 if (sret != -EBUSY)
5946 goto out;
5947
5948 trace_seq_init(&iter->seq);
5949
5950 if (iter->trace->read) {
5951 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5952 if (sret)
5953 goto out;
5954 }
5955
5956 waitagain:
5957 sret = tracing_wait_pipe(filp);
5958 if (sret <= 0)
5959 goto out;
5960
5961 /* stop when tracing is finished */
5962 if (trace_empty(iter)) {
5963 sret = 0;
5964 goto out;
5965 }
5966
5967 if (cnt >= PAGE_SIZE)
5968 cnt = PAGE_SIZE - 1;
5969
5970 /* reset all but tr, trace, and overruns */
5971 memset(&iter->seq, 0,
5972 sizeof(struct trace_iterator) -
5973 offsetof(struct trace_iterator, seq));
5974 cpumask_clear(iter->started);
5975 iter->pos = -1;
5976
5977 trace_event_read_lock();
5978 trace_access_lock(iter->cpu_file);
5979 while (trace_find_next_entry_inc(iter) != NULL) {
5980 enum print_line_t ret;
5981 int save_len = iter->seq.seq.len;
5982
5983 ret = print_trace_line(iter);
5984 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5985 /* don't print partial lines */
5986 iter->seq.seq.len = save_len;
5987 break;
5988 }
5989 if (ret != TRACE_TYPE_NO_CONSUME)
5990 trace_consume(iter);
5991
5992 if (trace_seq_used(&iter->seq) >= cnt)
5993 break;
5994
5995 /*
5996 * Setting the full flag means we reached the trace_seq buffer
5997 * size and we should leave by partial output condition above.
5998 * One of the trace_seq_* functions is not used properly.
5999 */
6000 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6001 iter->ent->type);
6002 }
6003 trace_access_unlock(iter->cpu_file);
6004 trace_event_read_unlock();
6005
6006 /* Now copy what we have to the user */
6007 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6008 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6009 trace_seq_init(&iter->seq);
6010
6011 /*
6012 * If there was nothing to send to user, in spite of consuming trace
6013 * entries, go back to wait for more entries.
6014 */
6015 if (sret == -EBUSY)
6016 goto waitagain;
6017
6018 out:
6019 mutex_unlock(&iter->mutex);
6020
6021 return sret;
6022 }
6023
6024 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6025 unsigned int idx)
6026 {
6027 __free_page(spd->pages[idx]);
6028 }
6029
6030 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6031 .confirm = generic_pipe_buf_confirm,
6032 .release = generic_pipe_buf_release,
6033 .steal = generic_pipe_buf_steal,
6034 .get = generic_pipe_buf_get,
6035 };
6036
6037 static size_t
6038 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6039 {
6040 size_t count;
6041 int save_len;
6042 int ret;
6043
6044 /* Seq buffer is page-sized, exactly what we need. */
6045 for (;;) {
6046 save_len = iter->seq.seq.len;
6047 ret = print_trace_line(iter);
6048
6049 if (trace_seq_has_overflowed(&iter->seq)) {
6050 iter->seq.seq.len = save_len;
6051 break;
6052 }
6053
6054 /*
6055 * This should not be hit, because it should only
6056 * be set if the iter->seq overflowed. But check it
6057 * anyway to be safe.
6058 */
6059 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6060 iter->seq.seq.len = save_len;
6061 break;
6062 }
6063
6064 count = trace_seq_used(&iter->seq) - save_len;
6065 if (rem < count) {
6066 rem = 0;
6067 iter->seq.seq.len = save_len;
6068 break;
6069 }
6070
6071 if (ret != TRACE_TYPE_NO_CONSUME)
6072 trace_consume(iter);
6073 rem -= count;
6074 if (!trace_find_next_entry_inc(iter)) {
6075 rem = 0;
6076 iter->ent = NULL;
6077 break;
6078 }
6079 }
6080
6081 return rem;
6082 }
6083
6084 static ssize_t tracing_splice_read_pipe(struct file *filp,
6085 loff_t *ppos,
6086 struct pipe_inode_info *pipe,
6087 size_t len,
6088 unsigned int flags)
6089 {
6090 struct page *pages_def[PIPE_DEF_BUFFERS];
6091 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6092 struct trace_iterator *iter = filp->private_data;
6093 struct splice_pipe_desc spd = {
6094 .pages = pages_def,
6095 .partial = partial_def,
6096 .nr_pages = 0, /* This gets updated below. */
6097 .nr_pages_max = PIPE_DEF_BUFFERS,
6098 .ops = &tracing_pipe_buf_ops,
6099 .spd_release = tracing_spd_release_pipe,
6100 };
6101 ssize_t ret;
6102 size_t rem;
6103 unsigned int i;
6104
6105 if (splice_grow_spd(pipe, &spd))
6106 return -ENOMEM;
6107
6108 mutex_lock(&iter->mutex);
6109
6110 if (iter->trace->splice_read) {
6111 ret = iter->trace->splice_read(iter, filp,
6112 ppos, pipe, len, flags);
6113 if (ret)
6114 goto out_err;
6115 }
6116
6117 ret = tracing_wait_pipe(filp);
6118 if (ret <= 0)
6119 goto out_err;
6120
6121 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6122 ret = -EFAULT;
6123 goto out_err;
6124 }
6125
6126 trace_event_read_lock();
6127 trace_access_lock(iter->cpu_file);
6128
6129 /* Fill as many pages as possible. */
6130 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6131 spd.pages[i] = alloc_page(GFP_KERNEL);
6132 if (!spd.pages[i])
6133 break;
6134
6135 rem = tracing_fill_pipe_page(rem, iter);
6136
6137 /* Copy the data into the page, so we can start over. */
6138 ret = trace_seq_to_buffer(&iter->seq,
6139 page_address(spd.pages[i]),
6140 trace_seq_used(&iter->seq));
6141 if (ret < 0) {
6142 __free_page(spd.pages[i]);
6143 break;
6144 }
6145 spd.partial[i].offset = 0;
6146 spd.partial[i].len = trace_seq_used(&iter->seq);
6147
6148 trace_seq_init(&iter->seq);
6149 }
6150
6151 trace_access_unlock(iter->cpu_file);
6152 trace_event_read_unlock();
6153 mutex_unlock(&iter->mutex);
6154
6155 spd.nr_pages = i;
6156
6157 if (i)
6158 ret = splice_to_pipe(pipe, &spd);
6159 else
6160 ret = 0;
6161 out:
6162 splice_shrink_spd(&spd);
6163 return ret;
6164
6165 out_err:
6166 mutex_unlock(&iter->mutex);
6167 goto out;
6168 }
6169
6170 static ssize_t
6171 tracing_entries_read(struct file *filp, char __user *ubuf,
6172 size_t cnt, loff_t *ppos)
6173 {
6174 struct inode *inode = file_inode(filp);
6175 struct trace_array *tr = inode->i_private;
6176 int cpu = tracing_get_cpu(inode);
6177 char buf[64];
6178 int r = 0;
6179 ssize_t ret;
6180
6181 mutex_lock(&trace_types_lock);
6182
6183 if (cpu == RING_BUFFER_ALL_CPUS) {
6184 int cpu, buf_size_same;
6185 unsigned long size;
6186
6187 size = 0;
6188 buf_size_same = 1;
6189 /* check if all cpu sizes are same */
6190 for_each_tracing_cpu(cpu) {
6191 /* fill in the size from first enabled cpu */
6192 if (size == 0)
6193 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6194 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6195 buf_size_same = 0;
6196 break;
6197 }
6198 }
6199
6200 if (buf_size_same) {
6201 if (!ring_buffer_expanded)
6202 r = sprintf(buf, "%lu (expanded: %lu)\n",
6203 size >> 10,
6204 trace_buf_size >> 10);
6205 else
6206 r = sprintf(buf, "%lu\n", size >> 10);
6207 } else
6208 r = sprintf(buf, "X\n");
6209 } else
6210 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6211
6212 mutex_unlock(&trace_types_lock);
6213
6214 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6215 return ret;
6216 }
6217
6218 static ssize_t
6219 tracing_entries_write(struct file *filp, const char __user *ubuf,
6220 size_t cnt, loff_t *ppos)
6221 {
6222 struct inode *inode = file_inode(filp);
6223 struct trace_array *tr = inode->i_private;
6224 unsigned long val;
6225 int ret;
6226
6227 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6228 if (ret)
6229 return ret;
6230
6231 /* must have at least 1 entry */
6232 if (!val)
6233 return -EINVAL;
6234
6235 /* value is in KB */
6236 val <<= 10;
6237 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6238 if (ret < 0)
6239 return ret;
6240
6241 *ppos += cnt;
6242
6243 return cnt;
6244 }
6245
6246 static ssize_t
6247 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6248 size_t cnt, loff_t *ppos)
6249 {
6250 struct trace_array *tr = filp->private_data;
6251 char buf[64];
6252 int r, cpu;
6253 unsigned long size = 0, expanded_size = 0;
6254
6255 mutex_lock(&trace_types_lock);
6256 for_each_tracing_cpu(cpu) {
6257 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6258 if (!ring_buffer_expanded)
6259 expanded_size += trace_buf_size >> 10;
6260 }
6261 if (ring_buffer_expanded)
6262 r = sprintf(buf, "%lu\n", size);
6263 else
6264 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6265 mutex_unlock(&trace_types_lock);
6266
6267 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6268 }
6269
6270 static ssize_t
6271 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6272 size_t cnt, loff_t *ppos)
6273 {
6274 /*
6275 * There is no need to read what the user has written, this function
6276 * is just to make sure that there is no error when "echo" is used
6277 */
6278
6279 *ppos += cnt;
6280
6281 return cnt;
6282 }
6283
6284 static int
6285 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6286 {
6287 struct trace_array *tr = inode->i_private;
6288
6289 /* disable tracing ? */
6290 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6291 tracer_tracing_off(tr);
6292 /* resize the ring buffer to 0 */
6293 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6294
6295 trace_array_put(tr);
6296
6297 return 0;
6298 }
6299
6300 static ssize_t
6301 tracing_mark_write(struct file *filp, const char __user *ubuf,
6302 size_t cnt, loff_t *fpos)
6303 {
6304 struct trace_array *tr = filp->private_data;
6305 struct ring_buffer_event *event;
6306 enum event_trigger_type tt = ETT_NONE;
6307 struct ring_buffer *buffer;
6308 struct print_entry *entry;
6309 unsigned long irq_flags;
6310 const char faulted[] = "<faulted>";
6311 ssize_t written;
6312 int size;
6313 int len;
6314
6315 /* Used in tracing_mark_raw_write() as well */
6316 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6317
6318 if (tracing_disabled)
6319 return -EINVAL;
6320
6321 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6322 return -EINVAL;
6323
6324 if (cnt > TRACE_BUF_SIZE)
6325 cnt = TRACE_BUF_SIZE;
6326
6327 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6328
6329 local_save_flags(irq_flags);
6330 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6331
6332 /* If less than "<faulted>", then make sure we can still add that */
6333 if (cnt < FAULTED_SIZE)
6334 size += FAULTED_SIZE - cnt;
6335
6336 buffer = tr->trace_buffer.buffer;
6337 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6338 irq_flags, preempt_count());
6339 if (unlikely(!event))
6340 /* Ring buffer disabled, return as if not open for write */
6341 return -EBADF;
6342
6343 entry = ring_buffer_event_data(event);
6344 entry->ip = _THIS_IP_;
6345
6346 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6347 if (len) {
6348 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6349 cnt = FAULTED_SIZE;
6350 written = -EFAULT;
6351 } else
6352 written = cnt;
6353 len = cnt;
6354
6355 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6356 /* do not add \n before testing triggers, but add \0 */
6357 entry->buf[cnt] = '\0';
6358 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6359 }
6360
6361 if (entry->buf[cnt - 1] != '\n') {
6362 entry->buf[cnt] = '\n';
6363 entry->buf[cnt + 1] = '\0';
6364 } else
6365 entry->buf[cnt] = '\0';
6366
6367 __buffer_unlock_commit(buffer, event);
6368
6369 if (tt)
6370 event_triggers_post_call(tr->trace_marker_file, tt);
6371
6372 if (written > 0)
6373 *fpos += written;
6374
6375 return written;
6376 }
6377
6378 /* Limit it for now to 3K (including tag) */
6379 #define RAW_DATA_MAX_SIZE (1024*3)
6380
6381 static ssize_t
6382 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6383 size_t cnt, loff_t *fpos)
6384 {
6385 struct trace_array *tr = filp->private_data;
6386 struct ring_buffer_event *event;
6387 struct ring_buffer *buffer;
6388 struct raw_data_entry *entry;
6389 const char faulted[] = "<faulted>";
6390 unsigned long irq_flags;
6391 ssize_t written;
6392 int size;
6393 int len;
6394
6395 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6396
6397 if (tracing_disabled)
6398 return -EINVAL;
6399
6400 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6401 return -EINVAL;
6402
6403 /* The marker must at least have a tag id */
6404 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6405 return -EINVAL;
6406
6407 if (cnt > TRACE_BUF_SIZE)
6408 cnt = TRACE_BUF_SIZE;
6409
6410 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6411
6412 local_save_flags(irq_flags);
6413 size = sizeof(*entry) + cnt;
6414 if (cnt < FAULT_SIZE_ID)
6415 size += FAULT_SIZE_ID - cnt;
6416
6417 buffer = tr->trace_buffer.buffer;
6418 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6419 irq_flags, preempt_count());
6420 if (!event)
6421 /* Ring buffer disabled, return as if not open for write */
6422 return -EBADF;
6423
6424 entry = ring_buffer_event_data(event);
6425
6426 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6427 if (len) {
6428 entry->id = -1;
6429 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6430 written = -EFAULT;
6431 } else
6432 written = cnt;
6433
6434 __buffer_unlock_commit(buffer, event);
6435
6436 if (written > 0)
6437 *fpos += written;
6438
6439 return written;
6440 }
6441
6442 static int tracing_clock_show(struct seq_file *m, void *v)
6443 {
6444 struct trace_array *tr = m->private;
6445 int i;
6446
6447 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6448 seq_printf(m,
6449 "%s%s%s%s", i ? " " : "",
6450 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6451 i == tr->clock_id ? "]" : "");
6452 seq_putc(m, '\n');
6453
6454 return 0;
6455 }
6456
6457 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6458 {
6459 int i;
6460
6461 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6462 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6463 break;
6464 }
6465 if (i == ARRAY_SIZE(trace_clocks))
6466 return -EINVAL;
6467
6468 mutex_lock(&trace_types_lock);
6469
6470 tr->clock_id = i;
6471
6472 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6473
6474 /*
6475 * New clock may not be consistent with the previous clock.
6476 * Reset the buffer so that it doesn't have incomparable timestamps.
6477 */
6478 tracing_reset_online_cpus(&tr->trace_buffer);
6479
6480 #ifdef CONFIG_TRACER_MAX_TRACE
6481 if (tr->max_buffer.buffer)
6482 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6483 tracing_reset_online_cpus(&tr->max_buffer);
6484 #endif
6485
6486 mutex_unlock(&trace_types_lock);
6487
6488 return 0;
6489 }
6490
6491 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6492 size_t cnt, loff_t *fpos)
6493 {
6494 struct seq_file *m = filp->private_data;
6495 struct trace_array *tr = m->private;
6496 char buf[64];
6497 const char *clockstr;
6498 int ret;
6499
6500 if (cnt >= sizeof(buf))
6501 return -EINVAL;
6502
6503 if (copy_from_user(buf, ubuf, cnt))
6504 return -EFAULT;
6505
6506 buf[cnt] = 0;
6507
6508 clockstr = strstrip(buf);
6509
6510 ret = tracing_set_clock(tr, clockstr);
6511 if (ret)
6512 return ret;
6513
6514 *fpos += cnt;
6515
6516 return cnt;
6517 }
6518
6519 static int tracing_clock_open(struct inode *inode, struct file *file)
6520 {
6521 struct trace_array *tr = inode->i_private;
6522 int ret;
6523
6524 if (tracing_disabled)
6525 return -ENODEV;
6526
6527 if (trace_array_get(tr))
6528 return -ENODEV;
6529
6530 ret = single_open(file, tracing_clock_show, inode->i_private);
6531 if (ret < 0)
6532 trace_array_put(tr);
6533
6534 return ret;
6535 }
6536
6537 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6538 {
6539 struct trace_array *tr = m->private;
6540
6541 mutex_lock(&trace_types_lock);
6542
6543 if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6544 seq_puts(m, "delta [absolute]\n");
6545 else
6546 seq_puts(m, "[delta] absolute\n");
6547
6548 mutex_unlock(&trace_types_lock);
6549
6550 return 0;
6551 }
6552
6553 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6554 {
6555 struct trace_array *tr = inode->i_private;
6556 int ret;
6557
6558 if (tracing_disabled)
6559 return -ENODEV;
6560
6561 if (trace_array_get(tr))
6562 return -ENODEV;
6563
6564 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6565 if (ret < 0)
6566 trace_array_put(tr);
6567
6568 return ret;
6569 }
6570
6571 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6572 {
6573 int ret = 0;
6574
6575 mutex_lock(&trace_types_lock);
6576
6577 if (abs && tr->time_stamp_abs_ref++)
6578 goto out;
6579
6580 if (!abs) {
6581 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6582 ret = -EINVAL;
6583 goto out;
6584 }
6585
6586 if (--tr->time_stamp_abs_ref)
6587 goto out;
6588 }
6589
6590 ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6591
6592 #ifdef CONFIG_TRACER_MAX_TRACE
6593 if (tr->max_buffer.buffer)
6594 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6595 #endif
6596 out:
6597 mutex_unlock(&trace_types_lock);
6598
6599 return ret;
6600 }
6601
6602 struct ftrace_buffer_info {
6603 struct trace_iterator iter;
6604 void *spare;
6605 unsigned int spare_cpu;
6606 unsigned int read;
6607 };
6608
6609 #ifdef CONFIG_TRACER_SNAPSHOT
6610 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6611 {
6612 struct trace_array *tr = inode->i_private;
6613 struct trace_iterator *iter;
6614 struct seq_file *m;
6615 int ret = 0;
6616
6617 if (trace_array_get(tr) < 0)
6618 return -ENODEV;
6619
6620 if (file->f_mode & FMODE_READ) {
6621 iter = __tracing_open(inode, file, true);
6622 if (IS_ERR(iter))
6623 ret = PTR_ERR(iter);
6624 } else {
6625 /* Writes still need the seq_file to hold the private data */
6626 ret = -ENOMEM;
6627 m = kzalloc(sizeof(*m), GFP_KERNEL);
6628 if (!m)
6629 goto out;
6630 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6631 if (!iter) {
6632 kfree(m);
6633 goto out;
6634 }
6635 ret = 0;
6636
6637 iter->tr = tr;
6638 iter->trace_buffer = &tr->max_buffer;
6639 iter->cpu_file = tracing_get_cpu(inode);
6640 m->private = iter;
6641 file->private_data = m;
6642 }
6643 out:
6644 if (ret < 0)
6645 trace_array_put(tr);
6646
6647 return ret;
6648 }
6649
6650 static ssize_t
6651 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6652 loff_t *ppos)
6653 {
6654 struct seq_file *m = filp->private_data;
6655 struct trace_iterator *iter = m->private;
6656 struct trace_array *tr = iter->tr;
6657 unsigned long val;
6658 int ret;
6659
6660 ret = tracing_update_buffers();
6661 if (ret < 0)
6662 return ret;
6663
6664 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6665 if (ret)
6666 return ret;
6667
6668 mutex_lock(&trace_types_lock);
6669
6670 if (tr->current_trace->use_max_tr) {
6671 ret = -EBUSY;
6672 goto out;
6673 }
6674
6675 arch_spin_lock(&tr->max_lock);
6676 if (tr->cond_snapshot)
6677 ret = -EBUSY;
6678 arch_spin_unlock(&tr->max_lock);
6679 if (ret)
6680 goto out;
6681
6682 switch (val) {
6683 case 0:
6684 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6685 ret = -EINVAL;
6686 break;
6687 }
6688 if (tr->allocated_snapshot)
6689 free_snapshot(tr);
6690 break;
6691 case 1:
6692 /* Only allow per-cpu swap if the ring buffer supports it */
6693 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6694 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6695 ret = -EINVAL;
6696 break;
6697 }
6698 #endif
6699 if (!tr->allocated_snapshot) {
6700 ret = tracing_alloc_snapshot_instance(tr);
6701 if (ret < 0)
6702 break;
6703 }
6704 local_irq_disable();
6705 /* Now, we're going to swap */
6706 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6707 update_max_tr(tr, current, smp_processor_id(), NULL);
6708 else
6709 update_max_tr_single(tr, current, iter->cpu_file);
6710 local_irq_enable();
6711 break;
6712 default:
6713 if (tr->allocated_snapshot) {
6714 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6715 tracing_reset_online_cpus(&tr->max_buffer);
6716 else
6717 tracing_reset(&tr->max_buffer, iter->cpu_file);
6718 }
6719 break;
6720 }
6721
6722 if (ret >= 0) {
6723 *ppos += cnt;
6724 ret = cnt;
6725 }
6726 out:
6727 mutex_unlock(&trace_types_lock);
6728 return ret;
6729 }
6730
6731 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6732 {
6733 struct seq_file *m = file->private_data;
6734 int ret;
6735
6736 ret = tracing_release(inode, file);
6737
6738 if (file->f_mode & FMODE_READ)
6739 return ret;
6740
6741 /* If write only, the seq_file is just a stub */
6742 if (m)
6743 kfree(m->private);
6744 kfree(m);
6745
6746 return 0;
6747 }
6748
6749 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6750 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6751 size_t count, loff_t *ppos);
6752 static int tracing_buffers_release(struct inode *inode, struct file *file);
6753 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6754 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6755
6756 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6757 {
6758 struct ftrace_buffer_info *info;
6759 int ret;
6760
6761 ret = tracing_buffers_open(inode, filp);
6762 if (ret < 0)
6763 return ret;
6764
6765 info = filp->private_data;
6766
6767 if (info->iter.trace->use_max_tr) {
6768 tracing_buffers_release(inode, filp);
6769 return -EBUSY;
6770 }
6771
6772 info->iter.snapshot = true;
6773 info->iter.trace_buffer = &info->iter.tr->max_buffer;
6774
6775 return ret;
6776 }
6777
6778 #endif /* CONFIG_TRACER_SNAPSHOT */
6779
6780
6781 static const struct file_operations tracing_thresh_fops = {
6782 .open = tracing_open_generic,
6783 .read = tracing_thresh_read,
6784 .write = tracing_thresh_write,
6785 .llseek = generic_file_llseek,
6786 };
6787
6788 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6789 static const struct file_operations tracing_max_lat_fops = {
6790 .open = tracing_open_generic,
6791 .read = tracing_max_lat_read,
6792 .write = tracing_max_lat_write,
6793 .llseek = generic_file_llseek,
6794 };
6795 #endif
6796
6797 static const struct file_operations set_tracer_fops = {
6798 .open = tracing_open_generic,
6799 .read = tracing_set_trace_read,
6800 .write = tracing_set_trace_write,
6801 .llseek = generic_file_llseek,
6802 };
6803
6804 static const struct file_operations tracing_pipe_fops = {
6805 .open = tracing_open_pipe,
6806 .poll = tracing_poll_pipe,
6807 .read = tracing_read_pipe,
6808 .splice_read = tracing_splice_read_pipe,
6809 .release = tracing_release_pipe,
6810 .llseek = no_llseek,
6811 };
6812
6813 static const struct file_operations tracing_entries_fops = {
6814 .open = tracing_open_generic_tr,
6815 .read = tracing_entries_read,
6816 .write = tracing_entries_write,
6817 .llseek = generic_file_llseek,
6818 .release = tracing_release_generic_tr,
6819 };
6820
6821 static const struct file_operations tracing_total_entries_fops = {
6822 .open = tracing_open_generic_tr,
6823 .read = tracing_total_entries_read,
6824 .llseek = generic_file_llseek,
6825 .release = tracing_release_generic_tr,
6826 };
6827
6828 static const struct file_operations tracing_free_buffer_fops = {
6829 .open = tracing_open_generic_tr,
6830 .write = tracing_free_buffer_write,
6831 .release = tracing_free_buffer_release,
6832 };
6833
6834 static const struct file_operations tracing_mark_fops = {
6835 .open = tracing_open_generic_tr,
6836 .write = tracing_mark_write,
6837 .llseek = generic_file_llseek,
6838 .release = tracing_release_generic_tr,
6839 };
6840
6841 static const struct file_operations tracing_mark_raw_fops = {
6842 .open = tracing_open_generic_tr,
6843 .write = tracing_mark_raw_write,
6844 .llseek = generic_file_llseek,
6845 .release = tracing_release_generic_tr,
6846 };
6847
6848 static const struct file_operations trace_clock_fops = {
6849 .open = tracing_clock_open,
6850 .read = seq_read,
6851 .llseek = seq_lseek,
6852 .release = tracing_single_release_tr,
6853 .write = tracing_clock_write,
6854 };
6855
6856 static const struct file_operations trace_time_stamp_mode_fops = {
6857 .open = tracing_time_stamp_mode_open,
6858 .read = seq_read,
6859 .llseek = seq_lseek,
6860 .release = tracing_single_release_tr,
6861 };
6862
6863 #ifdef CONFIG_TRACER_SNAPSHOT
6864 static const struct file_operations snapshot_fops = {
6865 .open = tracing_snapshot_open,
6866 .read = seq_read,
6867 .write = tracing_snapshot_write,
6868 .llseek = tracing_lseek,
6869 .release = tracing_snapshot_release,
6870 };
6871
6872 static const struct file_operations snapshot_raw_fops = {
6873 .open = snapshot_raw_open,
6874 .read = tracing_buffers_read,
6875 .release = tracing_buffers_release,
6876 .splice_read = tracing_buffers_splice_read,
6877 .llseek = no_llseek,
6878 };
6879
6880 #endif /* CONFIG_TRACER_SNAPSHOT */
6881
6882 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6883 {
6884 struct trace_array *tr = inode->i_private;
6885 struct ftrace_buffer_info *info;
6886 int ret;
6887
6888 if (tracing_disabled)
6889 return -ENODEV;
6890
6891 if (trace_array_get(tr) < 0)
6892 return -ENODEV;
6893
6894 info = kzalloc(sizeof(*info), GFP_KERNEL);
6895 if (!info) {
6896 trace_array_put(tr);
6897 return -ENOMEM;
6898 }
6899
6900 mutex_lock(&trace_types_lock);
6901
6902 info->iter.tr = tr;
6903 info->iter.cpu_file = tracing_get_cpu(inode);
6904 info->iter.trace = tr->current_trace;
6905 info->iter.trace_buffer = &tr->trace_buffer;
6906 info->spare = NULL;
6907 /* Force reading ring buffer for first read */
6908 info->read = (unsigned int)-1;
6909
6910 filp->private_data = info;
6911
6912 tr->current_trace->ref++;
6913
6914 mutex_unlock(&trace_types_lock);
6915
6916 ret = nonseekable_open(inode, filp);
6917 if (ret < 0)
6918 trace_array_put(tr);
6919
6920 return ret;
6921 }
6922
6923 static __poll_t
6924 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6925 {
6926 struct ftrace_buffer_info *info = filp->private_data;
6927 struct trace_iterator *iter = &info->iter;
6928
6929 return trace_poll(iter, filp, poll_table);
6930 }
6931
6932 static ssize_t
6933 tracing_buffers_read(struct file *filp, char __user *ubuf,
6934 size_t count, loff_t *ppos)
6935 {
6936 struct ftrace_buffer_info *info = filp->private_data;
6937 struct trace_iterator *iter = &info->iter;
6938 ssize_t ret = 0;
6939 ssize_t size;
6940
6941 if (!count)
6942 return 0;
6943
6944 #ifdef CONFIG_TRACER_MAX_TRACE
6945 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6946 return -EBUSY;
6947 #endif
6948
6949 if (!info->spare) {
6950 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6951 iter->cpu_file);
6952 if (IS_ERR(info->spare)) {
6953 ret = PTR_ERR(info->spare);
6954 info->spare = NULL;
6955 } else {
6956 info->spare_cpu = iter->cpu_file;
6957 }
6958 }
6959 if (!info->spare)
6960 return ret;
6961
6962 /* Do we have previous read data to read? */
6963 if (info->read < PAGE_SIZE)
6964 goto read;
6965
6966 again:
6967 trace_access_lock(iter->cpu_file);
6968 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6969 &info->spare,
6970 count,
6971 iter->cpu_file, 0);
6972 trace_access_unlock(iter->cpu_file);
6973
6974 if (ret < 0) {
6975 if (trace_empty(iter)) {
6976 if ((filp->f_flags & O_NONBLOCK))
6977 return -EAGAIN;
6978
6979 ret = wait_on_pipe(iter, 0);
6980 if (ret)
6981 return ret;
6982
6983 goto again;
6984 }
6985 return 0;
6986 }
6987
6988 info->read = 0;
6989 read:
6990 size = PAGE_SIZE - info->read;
6991 if (size > count)
6992 size = count;
6993
6994 ret = copy_to_user(ubuf, info->spare + info->read, size);
6995 if (ret == size)
6996 return -EFAULT;
6997
6998 size -= ret;
6999
7000 *ppos += size;
7001 info->read += size;
7002
7003 return size;
7004 }
7005
7006 static int tracing_buffers_release(struct inode *inode, struct file *file)
7007 {
7008 struct ftrace_buffer_info *info = file->private_data;
7009 struct trace_iterator *iter = &info->iter;
7010
7011 mutex_lock(&trace_types_lock);
7012
7013 iter->tr->current_trace->ref--;
7014
7015 __trace_array_put(iter->tr);
7016
7017 if (info->spare)
7018 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7019 info->spare_cpu, info->spare);
7020 kfree(info);
7021
7022 mutex_unlock(&trace_types_lock);
7023
7024 return 0;
7025 }
7026
7027 struct buffer_ref {
7028 struct ring_buffer *buffer;
7029 void *page;
7030 int cpu;
7031 refcount_t refcount;
7032 };
7033
7034 static void buffer_ref_release(struct buffer_ref *ref)
7035 {
7036 if (!refcount_dec_and_test(&ref->refcount))
7037 return;
7038 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7039 kfree(ref);
7040 }
7041
7042 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7043 struct pipe_buffer *buf)
7044 {
7045 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7046
7047 buffer_ref_release(ref);
7048 buf->private = 0;
7049 }
7050
7051 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7052 struct pipe_buffer *buf)
7053 {
7054 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7055
7056 refcount_inc(&ref->refcount);
7057 }
7058
7059 /* Pipe buffer operations for a buffer. */
7060 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7061 .confirm = generic_pipe_buf_confirm,
7062 .release = buffer_pipe_buf_release,
7063 .steal = generic_pipe_buf_nosteal,
7064 .get = buffer_pipe_buf_get,
7065 };
7066
7067 /*
7068 * Callback from splice_to_pipe(), if we need to release some pages
7069 * at the end of the spd in case we error'ed out in filling the pipe.
7070 */
7071 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7072 {
7073 struct buffer_ref *ref =
7074 (struct buffer_ref *)spd->partial[i].private;
7075
7076 buffer_ref_release(ref);
7077 spd->partial[i].private = 0;
7078 }
7079
7080 static ssize_t
7081 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7082 struct pipe_inode_info *pipe, size_t len,
7083 unsigned int flags)
7084 {
7085 struct ftrace_buffer_info *info = file->private_data;
7086 struct trace_iterator *iter = &info->iter;
7087 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7088 struct page *pages_def[PIPE_DEF_BUFFERS];
7089 struct splice_pipe_desc spd = {
7090 .pages = pages_def,
7091 .partial = partial_def,
7092 .nr_pages_max = PIPE_DEF_BUFFERS,
7093 .ops = &buffer_pipe_buf_ops,
7094 .spd_release = buffer_spd_release,
7095 };
7096 struct buffer_ref *ref;
7097 int entries, i;
7098 ssize_t ret = 0;
7099
7100 #ifdef CONFIG_TRACER_MAX_TRACE
7101 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7102 return -EBUSY;
7103 #endif
7104
7105 if (*ppos & (PAGE_SIZE - 1))
7106 return -EINVAL;
7107
7108 if (len & (PAGE_SIZE - 1)) {
7109 if (len < PAGE_SIZE)
7110 return -EINVAL;
7111 len &= PAGE_MASK;
7112 }
7113
7114 if (splice_grow_spd(pipe, &spd))
7115 return -ENOMEM;
7116
7117 again:
7118 trace_access_lock(iter->cpu_file);
7119 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7120
7121 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7122 struct page *page;
7123 int r;
7124
7125 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7126 if (!ref) {
7127 ret = -ENOMEM;
7128 break;
7129 }
7130
7131 refcount_set(&ref->refcount, 1);
7132 ref->buffer = iter->trace_buffer->buffer;
7133 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7134 if (IS_ERR(ref->page)) {
7135 ret = PTR_ERR(ref->page);
7136 ref->page = NULL;
7137 kfree(ref);
7138 break;
7139 }
7140 ref->cpu = iter->cpu_file;
7141
7142 r = ring_buffer_read_page(ref->buffer, &ref->page,
7143 len, iter->cpu_file, 1);
7144 if (r < 0) {
7145 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7146 ref->page);
7147 kfree(ref);
7148 break;
7149 }
7150
7151 page = virt_to_page(ref->page);
7152
7153 spd.pages[i] = page;
7154 spd.partial[i].len = PAGE_SIZE;
7155 spd.partial[i].offset = 0;
7156 spd.partial[i].private = (unsigned long)ref;
7157 spd.nr_pages++;
7158 *ppos += PAGE_SIZE;
7159
7160 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7161 }
7162
7163 trace_access_unlock(iter->cpu_file);
7164 spd.nr_pages = i;
7165
7166 /* did we read anything? */
7167 if (!spd.nr_pages) {
7168 if (ret)
7169 goto out;
7170
7171 ret = -EAGAIN;
7172 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7173 goto out;
7174
7175 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7176 if (ret)
7177 goto out;
7178
7179 goto again;
7180 }
7181
7182 ret = splice_to_pipe(pipe, &spd);
7183 out:
7184 splice_shrink_spd(&spd);
7185
7186 return ret;
7187 }
7188
7189 static const struct file_operations tracing_buffers_fops = {
7190 .open = tracing_buffers_open,
7191 .read = tracing_buffers_read,
7192 .poll = tracing_buffers_poll,
7193 .release = tracing_buffers_release,
7194 .splice_read = tracing_buffers_splice_read,
7195 .llseek = no_llseek,
7196 };
7197
7198 static ssize_t
7199 tracing_stats_read(struct file *filp, char __user *ubuf,
7200 size_t count, loff_t *ppos)
7201 {
7202 struct inode *inode = file_inode(filp);
7203 struct trace_array *tr = inode->i_private;
7204 struct trace_buffer *trace_buf = &tr->trace_buffer;
7205 int cpu = tracing_get_cpu(inode);
7206 struct trace_seq *s;
7207 unsigned long cnt;
7208 unsigned long long t;
7209 unsigned long usec_rem;
7210
7211 s = kmalloc(sizeof(*s), GFP_KERNEL);
7212 if (!s)
7213 return -ENOMEM;
7214
7215 trace_seq_init(s);
7216
7217 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7218 trace_seq_printf(s, "entries: %ld\n", cnt);
7219
7220 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7221 trace_seq_printf(s, "overrun: %ld\n", cnt);
7222
7223 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7224 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7225
7226 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7227 trace_seq_printf(s, "bytes: %ld\n", cnt);
7228
7229 if (trace_clocks[tr->clock_id].in_ns) {
7230 /* local or global for trace_clock */
7231 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7232 usec_rem = do_div(t, USEC_PER_SEC);
7233 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7234 t, usec_rem);
7235
7236 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7237 usec_rem = do_div(t, USEC_PER_SEC);
7238 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7239 } else {
7240 /* counter or tsc mode for trace_clock */
7241 trace_seq_printf(s, "oldest event ts: %llu\n",
7242 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7243
7244 trace_seq_printf(s, "now ts: %llu\n",
7245 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7246 }
7247
7248 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7249 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7250
7251 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7252 trace_seq_printf(s, "read events: %ld\n", cnt);
7253
7254 count = simple_read_from_buffer(ubuf, count, ppos,
7255 s->buffer, trace_seq_used(s));
7256
7257 kfree(s);
7258
7259 return count;
7260 }
7261
7262 static const struct file_operations tracing_stats_fops = {
7263 .open = tracing_open_generic_tr,
7264 .read = tracing_stats_read,
7265 .llseek = generic_file_llseek,
7266 .release = tracing_release_generic_tr,
7267 };
7268
7269 #ifdef CONFIG_DYNAMIC_FTRACE
7270
7271 static ssize_t
7272 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7273 size_t cnt, loff_t *ppos)
7274 {
7275 unsigned long *p = filp->private_data;
7276 char buf[64]; /* Not too big for a shallow stack */
7277 int r;
7278
7279 r = scnprintf(buf, 63, "%ld", *p);
7280 buf[r++] = '\n';
7281
7282 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7283 }
7284
7285 static const struct file_operations tracing_dyn_info_fops = {
7286 .open = tracing_open_generic,
7287 .read = tracing_read_dyn_info,
7288 .llseek = generic_file_llseek,
7289 };
7290 #endif /* CONFIG_DYNAMIC_FTRACE */
7291
7292 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7293 static void
7294 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7295 struct trace_array *tr, struct ftrace_probe_ops *ops,
7296 void *data)
7297 {
7298 tracing_snapshot_instance(tr);
7299 }
7300
7301 static void
7302 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7303 struct trace_array *tr, struct ftrace_probe_ops *ops,
7304 void *data)
7305 {
7306 struct ftrace_func_mapper *mapper = data;
7307 long *count = NULL;
7308
7309 if (mapper)
7310 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7311
7312 if (count) {
7313
7314 if (*count <= 0)
7315 return;
7316
7317 (*count)--;
7318 }
7319
7320 tracing_snapshot_instance(tr);
7321 }
7322
7323 static int
7324 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7325 struct ftrace_probe_ops *ops, void *data)
7326 {
7327 struct ftrace_func_mapper *mapper = data;
7328 long *count = NULL;
7329
7330 seq_printf(m, "%ps:", (void *)ip);
7331
7332 seq_puts(m, "snapshot");
7333
7334 if (mapper)
7335 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7336
7337 if (count)
7338 seq_printf(m, ":count=%ld\n", *count);
7339 else
7340 seq_puts(m, ":unlimited\n");
7341
7342 return 0;
7343 }
7344
7345 static int
7346 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7347 unsigned long ip, void *init_data, void **data)
7348 {
7349 struct ftrace_func_mapper *mapper = *data;
7350
7351 if (!mapper) {
7352 mapper = allocate_ftrace_func_mapper();
7353 if (!mapper)
7354 return -ENOMEM;
7355 *data = mapper;
7356 }
7357
7358 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7359 }
7360
7361 static void
7362 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7363 unsigned long ip, void *data)
7364 {
7365 struct ftrace_func_mapper *mapper = data;
7366
7367 if (!ip) {
7368 if (!mapper)
7369 return;
7370 free_ftrace_func_mapper(mapper, NULL);
7371 return;
7372 }
7373
7374 ftrace_func_mapper_remove_ip(mapper, ip);
7375 }
7376
7377 static struct ftrace_probe_ops snapshot_probe_ops = {
7378 .func = ftrace_snapshot,
7379 .print = ftrace_snapshot_print,
7380 };
7381
7382 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7383 .func = ftrace_count_snapshot,
7384 .print = ftrace_snapshot_print,
7385 .init = ftrace_snapshot_init,
7386 .free = ftrace_snapshot_free,
7387 };
7388
7389 static int
7390 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7391 char *glob, char *cmd, char *param, int enable)
7392 {
7393 struct ftrace_probe_ops *ops;
7394 void *count = (void *)-1;
7395 char *number;
7396 int ret;
7397
7398 if (!tr)
7399 return -ENODEV;
7400
7401 /* hash funcs only work with set_ftrace_filter */
7402 if (!enable)
7403 return -EINVAL;
7404
7405 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
7406
7407 if (glob[0] == '!')
7408 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7409
7410 if (!param)
7411 goto out_reg;
7412
7413 number = strsep(&param, ":");
7414
7415 if (!strlen(number))
7416 goto out_reg;
7417
7418 /*
7419 * We use the callback data field (which is a pointer)
7420 * as our counter.
7421 */
7422 ret = kstrtoul(number, 0, (unsigned long *)&count);
7423 if (ret)
7424 return ret;
7425
7426 out_reg:
7427 ret = tracing_alloc_snapshot_instance(tr);
7428 if (ret < 0)
7429 goto out;
7430
7431 ret = register_ftrace_function_probe(glob, tr, ops, count);
7432
7433 out:
7434 return ret < 0 ? ret : 0;
7435 }
7436
7437 static struct ftrace_func_command ftrace_snapshot_cmd = {
7438 .name = "snapshot",
7439 .func = ftrace_trace_snapshot_callback,
7440 };
7441
7442 static __init int register_snapshot_cmd(void)
7443 {
7444 return register_ftrace_command(&ftrace_snapshot_cmd);
7445 }
7446 #else
7447 static inline __init int register_snapshot_cmd(void) { return 0; }
7448 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7449
7450 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7451 {
7452 if (WARN_ON(!tr->dir))
7453 return ERR_PTR(-ENODEV);
7454
7455 /* Top directory uses NULL as the parent */
7456 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7457 return NULL;
7458
7459 /* All sub buffers have a descriptor */
7460 return tr->dir;
7461 }
7462
7463 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7464 {
7465 struct dentry *d_tracer;
7466
7467 if (tr->percpu_dir)
7468 return tr->percpu_dir;
7469
7470 d_tracer = tracing_get_dentry(tr);
7471 if (IS_ERR(d_tracer))
7472 return NULL;
7473
7474 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7475
7476 WARN_ONCE(!tr->percpu_dir,
7477 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7478
7479 return tr->percpu_dir;
7480 }
7481
7482 static struct dentry *
7483 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7484 void *data, long cpu, const struct file_operations *fops)
7485 {
7486 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7487
7488 if (ret) /* See tracing_get_cpu() */
7489 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7490 return ret;
7491 }
7492
7493 static void
7494 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7495 {
7496 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7497 struct dentry *d_cpu;
7498 char cpu_dir[30]; /* 30 characters should be more than enough */
7499
7500 if (!d_percpu)
7501 return;
7502
7503 snprintf(cpu_dir, 30, "cpu%ld", cpu);
7504 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7505 if (!d_cpu) {
7506 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7507 return;
7508 }
7509
7510 /* per cpu trace_pipe */
7511 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7512 tr, cpu, &tracing_pipe_fops);
7513
7514 /* per cpu trace */
7515 trace_create_cpu_file("trace", 0644, d_cpu,
7516 tr, cpu, &tracing_fops);
7517
7518 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7519 tr, cpu, &tracing_buffers_fops);
7520
7521 trace_create_cpu_file("stats", 0444, d_cpu,
7522 tr, cpu, &tracing_stats_fops);
7523
7524 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7525 tr, cpu, &tracing_entries_fops);
7526
7527 #ifdef CONFIG_TRACER_SNAPSHOT
7528 trace_create_cpu_file("snapshot", 0644, d_cpu,
7529 tr, cpu, &snapshot_fops);
7530
7531 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7532 tr, cpu, &snapshot_raw_fops);
7533 #endif
7534 }
7535
7536 #ifdef CONFIG_FTRACE_SELFTEST
7537 /* Let selftest have access to static functions in this file */
7538 #include "trace_selftest.c"
7539 #endif
7540
7541 static ssize_t
7542 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7543 loff_t *ppos)
7544 {
7545 struct trace_option_dentry *topt = filp->private_data;
7546 char *buf;
7547
7548 if (topt->flags->val & topt->opt->bit)
7549 buf = "1\n";
7550 else
7551 buf = "0\n";
7552
7553 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7554 }
7555
7556 static ssize_t
7557 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7558 loff_t *ppos)
7559 {
7560 struct trace_option_dentry *topt = filp->private_data;
7561 unsigned long val;
7562 int ret;
7563
7564 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7565 if (ret)
7566 return ret;
7567
7568 if (val != 0 && val != 1)
7569 return -EINVAL;
7570
7571 if (!!(topt->flags->val & topt->opt->bit) != val) {
7572 mutex_lock(&trace_types_lock);
7573 ret = __set_tracer_option(topt->tr, topt->flags,
7574 topt->opt, !val);
7575 mutex_unlock(&trace_types_lock);
7576 if (ret)
7577 return ret;
7578 }
7579
7580 *ppos += cnt;
7581
7582 return cnt;
7583 }
7584
7585
7586 static const struct file_operations trace_options_fops = {
7587 .open = tracing_open_generic,
7588 .read = trace_options_read,
7589 .write = trace_options_write,
7590 .llseek = generic_file_llseek,
7591 };
7592
7593 /*
7594 * In order to pass in both the trace_array descriptor as well as the index
7595 * to the flag that the trace option file represents, the trace_array
7596 * has a character array of trace_flags_index[], which holds the index
7597 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7598 * The address of this character array is passed to the flag option file
7599 * read/write callbacks.
7600 *
7601 * In order to extract both the index and the trace_array descriptor,
7602 * get_tr_index() uses the following algorithm.
7603 *
7604 * idx = *ptr;
7605 *
7606 * As the pointer itself contains the address of the index (remember
7607 * index[1] == 1).
7608 *
7609 * Then to get the trace_array descriptor, by subtracting that index
7610 * from the ptr, we get to the start of the index itself.
7611 *
7612 * ptr - idx == &index[0]
7613 *
7614 * Then a simple container_of() from that pointer gets us to the
7615 * trace_array descriptor.
7616 */
7617 static void get_tr_index(void *data, struct trace_array **ptr,
7618 unsigned int *pindex)
7619 {
7620 *pindex = *(unsigned char *)data;
7621
7622 *ptr = container_of(data - *pindex, struct trace_array,
7623 trace_flags_index);
7624 }
7625
7626 static ssize_t
7627 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7628 loff_t *ppos)
7629 {
7630 void *tr_index = filp->private_data;
7631 struct trace_array *tr;
7632 unsigned int index;
7633 char *buf;
7634
7635 get_tr_index(tr_index, &tr, &index);
7636
7637 if (tr->trace_flags & (1 << index))
7638 buf = "1\n";
7639 else
7640 buf = "0\n";
7641
7642 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7643 }
7644
7645 static ssize_t
7646 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7647 loff_t *ppos)
7648 {
7649 void *tr_index = filp->private_data;
7650 struct trace_array *tr;
7651 unsigned int index;
7652 unsigned long val;
7653 int ret;
7654
7655 get_tr_index(tr_index, &tr, &index);
7656
7657 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7658 if (ret)
7659 return ret;
7660
7661 if (val != 0 && val != 1)
7662 return -EINVAL;
7663
7664 mutex_lock(&trace_types_lock);
7665 ret = set_tracer_flag(tr, 1 << index, val);
7666 mutex_unlock(&trace_types_lock);
7667
7668 if (ret < 0)
7669 return ret;
7670
7671 *ppos += cnt;
7672
7673 return cnt;
7674 }
7675
7676 static const struct file_operations trace_options_core_fops = {
7677 .open = tracing_open_generic,
7678 .read = trace_options_core_read,
7679 .write = trace_options_core_write,
7680 .llseek = generic_file_llseek,
7681 };
7682
7683 struct dentry *trace_create_file(const char *name,
7684 umode_t mode,
7685 struct dentry *parent,
7686 void *data,
7687 const struct file_operations *fops)
7688 {
7689 struct dentry *ret;
7690
7691 ret = tracefs_create_file(name, mode, parent, data, fops);
7692 if (!ret)
7693 pr_warn("Could not create tracefs '%s' entry\n", name);
7694
7695 return ret;
7696 }
7697
7698
7699 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7700 {
7701 struct dentry *d_tracer;
7702
7703 if (tr->options)
7704 return tr->options;
7705
7706 d_tracer = tracing_get_dentry(tr);
7707 if (IS_ERR(d_tracer))
7708 return NULL;
7709
7710 tr->options = tracefs_create_dir("options", d_tracer);
7711 if (!tr->options) {
7712 pr_warn("Could not create tracefs directory 'options'\n");
7713 return NULL;
7714 }
7715
7716 return tr->options;
7717 }
7718
7719 static void
7720 create_trace_option_file(struct trace_array *tr,
7721 struct trace_option_dentry *topt,
7722 struct tracer_flags *flags,
7723 struct tracer_opt *opt)
7724 {
7725 struct dentry *t_options;
7726
7727 t_options = trace_options_init_dentry(tr);
7728 if (!t_options)
7729 return;
7730
7731 topt->flags = flags;
7732 topt->opt = opt;
7733 topt->tr = tr;
7734
7735 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7736 &trace_options_fops);
7737
7738 }
7739
7740 static void
7741 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7742 {
7743 struct trace_option_dentry *topts;
7744 struct trace_options *tr_topts;
7745 struct tracer_flags *flags;
7746 struct tracer_opt *opts;
7747 int cnt;
7748 int i;
7749
7750 if (!tracer)
7751 return;
7752
7753 flags = tracer->flags;
7754
7755 if (!flags || !flags->opts)
7756 return;
7757
7758 /*
7759 * If this is an instance, only create flags for tracers
7760 * the instance may have.
7761 */
7762 if (!trace_ok_for_array(tracer, tr))
7763 return;
7764
7765 for (i = 0; i < tr->nr_topts; i++) {
7766 /* Make sure there's no duplicate flags. */
7767 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7768 return;
7769 }
7770
7771 opts = flags->opts;
7772
7773 for (cnt = 0; opts[cnt].name; cnt++)
7774 ;
7775
7776 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7777 if (!topts)
7778 return;
7779
7780 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7781 GFP_KERNEL);
7782 if (!tr_topts) {
7783 kfree(topts);
7784 return;
7785 }
7786
7787 tr->topts = tr_topts;
7788 tr->topts[tr->nr_topts].tracer = tracer;
7789 tr->topts[tr->nr_topts].topts = topts;
7790 tr->nr_topts++;
7791
7792 for (cnt = 0; opts[cnt].name; cnt++) {
7793 create_trace_option_file(tr, &topts[cnt], flags,
7794 &opts[cnt]);
7795 WARN_ONCE(topts[cnt].entry == NULL,
7796 "Failed to create trace option: %s",
7797 opts[cnt].name);
7798 }
7799 }
7800
7801 static struct dentry *
7802 create_trace_option_core_file(struct trace_array *tr,
7803 const char *option, long index)
7804 {
7805 struct dentry *t_options;
7806
7807 t_options = trace_options_init_dentry(tr);
7808 if (!t_options)
7809 return NULL;
7810
7811 return trace_create_file(option, 0644, t_options,
7812 (void *)&tr->trace_flags_index[index],
7813 &trace_options_core_fops);
7814 }
7815
7816 static void create_trace_options_dir(struct trace_array *tr)
7817 {
7818 struct dentry *t_options;
7819 bool top_level = tr == &global_trace;
7820 int i;
7821
7822 t_options = trace_options_init_dentry(tr);
7823 if (!t_options)
7824 return;
7825
7826 for (i = 0; trace_options[i]; i++) {
7827 if (top_level ||
7828 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7829 create_trace_option_core_file(tr, trace_options[i], i);
7830 }
7831 }
7832
7833 static ssize_t
7834 rb_simple_read(struct file *filp, char __user *ubuf,
7835 size_t cnt, loff_t *ppos)
7836 {
7837 struct trace_array *tr = filp->private_data;
7838 char buf[64];
7839 int r;
7840
7841 r = tracer_tracing_is_on(tr);
7842 r = sprintf(buf, "%d\n", r);
7843
7844 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7845 }
7846
7847 static ssize_t
7848 rb_simple_write(struct file *filp, const char __user *ubuf,
7849 size_t cnt, loff_t *ppos)
7850 {
7851 struct trace_array *tr = filp->private_data;
7852 struct ring_buffer *buffer = tr->trace_buffer.buffer;
7853 unsigned long val;
7854 int ret;
7855
7856 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7857 if (ret)
7858 return ret;
7859
7860 if (buffer) {
7861 mutex_lock(&trace_types_lock);
7862 if (!!val == tracer_tracing_is_on(tr)) {
7863 val = 0; /* do nothing */
7864 } else if (val) {
7865 tracer_tracing_on(tr);
7866 if (tr->current_trace->start)
7867 tr->current_trace->start(tr);
7868 } else {
7869 tracer_tracing_off(tr);
7870 if (tr->current_trace->stop)
7871 tr->current_trace->stop(tr);
7872 }
7873 mutex_unlock(&trace_types_lock);
7874 }
7875
7876 (*ppos)++;
7877
7878 return cnt;
7879 }
7880
7881 static const struct file_operations rb_simple_fops = {
7882 .open = tracing_open_generic_tr,
7883 .read = rb_simple_read,
7884 .write = rb_simple_write,
7885 .release = tracing_release_generic_tr,
7886 .llseek = default_llseek,
7887 };
7888
7889 static ssize_t
7890 buffer_percent_read(struct file *filp, char __user *ubuf,
7891 size_t cnt, loff_t *ppos)
7892 {
7893 struct trace_array *tr = filp->private_data;
7894 char buf[64];
7895 int r;
7896
7897 r = tr->buffer_percent;
7898 r = sprintf(buf, "%d\n", r);
7899
7900 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7901 }
7902
7903 static ssize_t
7904 buffer_percent_write(struct file *filp, const char __user *ubuf,
7905 size_t cnt, loff_t *ppos)
7906 {
7907 struct trace_array *tr = filp->private_data;
7908 unsigned long val;
7909 int ret;
7910
7911 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7912 if (ret)
7913 return ret;
7914
7915 if (val > 100)
7916 return -EINVAL;
7917
7918 if (!val)
7919 val = 1;
7920
7921 tr->buffer_percent = val;
7922
7923 (*ppos)++;
7924
7925 return cnt;
7926 }
7927
7928 static const struct file_operations buffer_percent_fops = {
7929 .open = tracing_open_generic_tr,
7930 .read = buffer_percent_read,
7931 .write = buffer_percent_write,
7932 .release = tracing_release_generic_tr,
7933 .llseek = default_llseek,
7934 };
7935
7936 struct dentry *trace_instance_dir;
7937
7938 static void
7939 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7940
7941 static int
7942 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7943 {
7944 enum ring_buffer_flags rb_flags;
7945
7946 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7947
7948 buf->tr = tr;
7949
7950 buf->buffer = ring_buffer_alloc(size, rb_flags);
7951 if (!buf->buffer)
7952 return -ENOMEM;
7953
7954 buf->data = alloc_percpu(struct trace_array_cpu);
7955 if (!buf->data) {
7956 ring_buffer_free(buf->buffer);
7957 buf->buffer = NULL;
7958 return -ENOMEM;
7959 }
7960
7961 /* Allocate the first page for all buffers */
7962 set_buffer_entries(&tr->trace_buffer,
7963 ring_buffer_size(tr->trace_buffer.buffer, 0));
7964
7965 return 0;
7966 }
7967
7968 static int allocate_trace_buffers(struct trace_array *tr, int size)
7969 {
7970 int ret;
7971
7972 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7973 if (ret)
7974 return ret;
7975
7976 #ifdef CONFIG_TRACER_MAX_TRACE
7977 ret = allocate_trace_buffer(tr, &tr->max_buffer,
7978 allocate_snapshot ? size : 1);
7979 if (WARN_ON(ret)) {
7980 ring_buffer_free(tr->trace_buffer.buffer);
7981 tr->trace_buffer.buffer = NULL;
7982 free_percpu(tr->trace_buffer.data);
7983 tr->trace_buffer.data = NULL;
7984 return -ENOMEM;
7985 }
7986 tr->allocated_snapshot = allocate_snapshot;
7987
7988 /*
7989 * Only the top level trace array gets its snapshot allocated
7990 * from the kernel command line.
7991 */
7992 allocate_snapshot = false;
7993 #endif
7994 return 0;
7995 }
7996
7997 static void free_trace_buffer(struct trace_buffer *buf)
7998 {
7999 if (buf->buffer) {
8000 ring_buffer_free(buf->buffer);
8001 buf->buffer = NULL;
8002 free_percpu(buf->data);
8003 buf->data = NULL;
8004 }
8005 }
8006
8007 static void free_trace_buffers(struct trace_array *tr)
8008 {
8009 if (!tr)
8010 return;
8011
8012 free_trace_buffer(&tr->trace_buffer);
8013
8014 #ifdef CONFIG_TRACER_MAX_TRACE
8015 free_trace_buffer(&tr->max_buffer);
8016 #endif
8017 }
8018
8019 static void init_trace_flags_index(struct trace_array *tr)
8020 {
8021 int i;
8022
8023 /* Used by the trace options files */
8024 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8025 tr->trace_flags_index[i] = i;
8026 }
8027
8028 static void __update_tracer_options(struct trace_array *tr)
8029 {
8030 struct tracer *t;
8031
8032 for (t = trace_types; t; t = t->next)
8033 add_tracer_options(tr, t);
8034 }
8035
8036 static void update_tracer_options(struct trace_array *tr)
8037 {
8038 mutex_lock(&trace_types_lock);
8039 __update_tracer_options(tr);
8040 mutex_unlock(&trace_types_lock);
8041 }
8042
8043 static int instance_mkdir(const char *name)
8044 {
8045 struct trace_array *tr;
8046 int ret;
8047
8048 mutex_lock(&event_mutex);
8049 mutex_lock(&trace_types_lock);
8050
8051 ret = -EEXIST;
8052 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8053 if (tr->name && strcmp(tr->name, name) == 0)
8054 goto out_unlock;
8055 }
8056
8057 ret = -ENOMEM;
8058 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8059 if (!tr)
8060 goto out_unlock;
8061
8062 tr->name = kstrdup(name, GFP_KERNEL);
8063 if (!tr->name)
8064 goto out_free_tr;
8065
8066 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8067 goto out_free_tr;
8068
8069 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8070
8071 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8072
8073 raw_spin_lock_init(&tr->start_lock);
8074
8075 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8076
8077 tr->current_trace = &nop_trace;
8078
8079 INIT_LIST_HEAD(&tr->systems);
8080 INIT_LIST_HEAD(&tr->events);
8081 INIT_LIST_HEAD(&tr->hist_vars);
8082
8083 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8084 goto out_free_tr;
8085
8086 tr->dir = tracefs_create_dir(name, trace_instance_dir);
8087 if (!tr->dir)
8088 goto out_free_tr;
8089
8090 ret = event_trace_add_tracer(tr->dir, tr);
8091 if (ret) {
8092 tracefs_remove_recursive(tr->dir);
8093 goto out_free_tr;
8094 }
8095
8096 ftrace_init_trace_array(tr);
8097
8098 init_tracer_tracefs(tr, tr->dir);
8099 init_trace_flags_index(tr);
8100 __update_tracer_options(tr);
8101
8102 list_add(&tr->list, &ftrace_trace_arrays);
8103
8104 mutex_unlock(&trace_types_lock);
8105 mutex_unlock(&event_mutex);
8106
8107 return 0;
8108
8109 out_free_tr:
8110 free_trace_buffers(tr);
8111 free_cpumask_var(tr->tracing_cpumask);
8112 kfree(tr->name);
8113 kfree(tr);
8114
8115 out_unlock:
8116 mutex_unlock(&trace_types_lock);
8117 mutex_unlock(&event_mutex);
8118
8119 return ret;
8120
8121 }
8122
8123 static int instance_rmdir(const char *name)
8124 {
8125 struct trace_array *tr;
8126 int found = 0;
8127 int ret;
8128 int i;
8129
8130 mutex_lock(&event_mutex);
8131 mutex_lock(&trace_types_lock);
8132
8133 ret = -ENODEV;
8134 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8135 if (tr->name && strcmp(tr->name, name) == 0) {
8136 found = 1;
8137 break;
8138 }
8139 }
8140 if (!found)
8141 goto out_unlock;
8142
8143 ret = -EBUSY;
8144 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8145 goto out_unlock;
8146
8147 list_del(&tr->list);
8148
8149 /* Disable all the flags that were enabled coming in */
8150 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8151 if ((1 << i) & ZEROED_TRACE_FLAGS)
8152 set_tracer_flag(tr, 1 << i, 0);
8153 }
8154
8155 tracing_set_nop(tr);
8156 clear_ftrace_function_probes(tr);
8157 event_trace_del_tracer(tr);
8158 ftrace_clear_pids(tr);
8159 ftrace_destroy_function_files(tr);
8160 tracefs_remove_recursive(tr->dir);
8161 free_trace_buffers(tr);
8162
8163 for (i = 0; i < tr->nr_topts; i++) {
8164 kfree(tr->topts[i].topts);
8165 }
8166 kfree(tr->topts);
8167
8168 free_cpumask_var(tr->tracing_cpumask);
8169 kfree(tr->name);
8170 kfree(tr);
8171
8172 ret = 0;
8173
8174 out_unlock:
8175 mutex_unlock(&trace_types_lock);
8176 mutex_unlock(&event_mutex);
8177
8178 return ret;
8179 }
8180
8181 static __init void create_trace_instances(struct dentry *d_tracer)
8182 {
8183 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8184 instance_mkdir,
8185 instance_rmdir);
8186 if (WARN_ON(!trace_instance_dir))
8187 return;
8188 }
8189
8190 static void
8191 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8192 {
8193 struct trace_event_file *file;
8194 int cpu;
8195
8196 trace_create_file("available_tracers", 0444, d_tracer,
8197 tr, &show_traces_fops);
8198
8199 trace_create_file("current_tracer", 0644, d_tracer,
8200 tr, &set_tracer_fops);
8201
8202 trace_create_file("tracing_cpumask", 0644, d_tracer,
8203 tr, &tracing_cpumask_fops);
8204
8205 trace_create_file("trace_options", 0644, d_tracer,
8206 tr, &tracing_iter_fops);
8207
8208 trace_create_file("trace", 0644, d_tracer,
8209 tr, &tracing_fops);
8210
8211 trace_create_file("trace_pipe", 0444, d_tracer,
8212 tr, &tracing_pipe_fops);
8213
8214 trace_create_file("buffer_size_kb", 0644, d_tracer,
8215 tr, &tracing_entries_fops);
8216
8217 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8218 tr, &tracing_total_entries_fops);
8219
8220 trace_create_file("free_buffer", 0200, d_tracer,
8221 tr, &tracing_free_buffer_fops);
8222
8223 trace_create_file("trace_marker", 0220, d_tracer,
8224 tr, &tracing_mark_fops);
8225
8226 file = __find_event_file(tr, "ftrace", "print");
8227 if (file && file->dir)
8228 trace_create_file("trigger", 0644, file->dir, file,
8229 &event_trigger_fops);
8230 tr->trace_marker_file = file;
8231
8232 trace_create_file("trace_marker_raw", 0220, d_tracer,
8233 tr, &tracing_mark_raw_fops);
8234
8235 trace_create_file("trace_clock", 0644, d_tracer, tr,
8236 &trace_clock_fops);
8237
8238 trace_create_file("tracing_on", 0644, d_tracer,
8239 tr, &rb_simple_fops);
8240
8241 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8242 &trace_time_stamp_mode_fops);
8243
8244 tr->buffer_percent = 50;
8245
8246 trace_create_file("buffer_percent", 0444, d_tracer,
8247 tr, &buffer_percent_fops);
8248
8249 create_trace_options_dir(tr);
8250
8251 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8252 trace_create_file("tracing_max_latency", 0644, d_tracer,
8253 &tr->max_latency, &tracing_max_lat_fops);
8254 #endif
8255
8256 if (ftrace_create_function_files(tr, d_tracer))
8257 WARN(1, "Could not allocate function filter files");
8258
8259 #ifdef CONFIG_TRACER_SNAPSHOT
8260 trace_create_file("snapshot", 0644, d_tracer,
8261 tr, &snapshot_fops);
8262 #endif
8263
8264 for_each_tracing_cpu(cpu)
8265 tracing_init_tracefs_percpu(tr, cpu);
8266
8267 ftrace_init_tracefs(tr, d_tracer);
8268 }
8269
8270 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8271 {
8272 struct vfsmount *mnt;
8273 struct file_system_type *type;
8274
8275 /*
8276 * To maintain backward compatibility for tools that mount
8277 * debugfs to get to the tracing facility, tracefs is automatically
8278 * mounted to the debugfs/tracing directory.
8279 */
8280 type = get_fs_type("tracefs");
8281 if (!type)
8282 return NULL;
8283 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8284 put_filesystem(type);
8285 if (IS_ERR(mnt))
8286 return NULL;
8287 mntget(mnt);
8288
8289 return mnt;
8290 }
8291
8292 /**
8293 * tracing_init_dentry - initialize top level trace array
8294 *
8295 * This is called when creating files or directories in the tracing
8296 * directory. It is called via fs_initcall() by any of the boot up code
8297 * and expects to return the dentry of the top level tracing directory.
8298 */
8299 struct dentry *tracing_init_dentry(void)
8300 {
8301 struct trace_array *tr = &global_trace;
8302
8303 /* The top level trace array uses NULL as parent */
8304 if (tr->dir)
8305 return NULL;
8306
8307 if (WARN_ON(!tracefs_initialized()) ||
8308 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8309 WARN_ON(!debugfs_initialized())))
8310 return ERR_PTR(-ENODEV);
8311
8312 /*
8313 * As there may still be users that expect the tracing
8314 * files to exist in debugfs/tracing, we must automount
8315 * the tracefs file system there, so older tools still
8316 * work with the newer kerenl.
8317 */
8318 tr->dir = debugfs_create_automount("tracing", NULL,
8319 trace_automount, NULL);
8320 if (!tr->dir) {
8321 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8322 return ERR_PTR(-ENOMEM);
8323 }
8324
8325 return NULL;
8326 }
8327
8328 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8329 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8330
8331 static void __init trace_eval_init(void)
8332 {
8333 int len;
8334
8335 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8336 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8337 }
8338
8339 #ifdef CONFIG_MODULES
8340 static void trace_module_add_evals(struct module *mod)
8341 {
8342 if (!mod->num_trace_evals)
8343 return;
8344
8345 /*
8346 * Modules with bad taint do not have events created, do
8347 * not bother with enums either.
8348 */
8349 if (trace_module_has_bad_taint(mod))
8350 return;
8351
8352 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8353 }
8354
8355 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8356 static void trace_module_remove_evals(struct module *mod)
8357 {
8358 union trace_eval_map_item *map;
8359 union trace_eval_map_item **last = &trace_eval_maps;
8360
8361 if (!mod->num_trace_evals)
8362 return;
8363
8364 mutex_lock(&trace_eval_mutex);
8365
8366 map = trace_eval_maps;
8367
8368 while (map) {
8369 if (map->head.mod == mod)
8370 break;
8371 map = trace_eval_jmp_to_tail(map);
8372 last = &map->tail.next;
8373 map = map->tail.next;
8374 }
8375 if (!map)
8376 goto out;
8377
8378 *last = trace_eval_jmp_to_tail(map)->tail.next;
8379 kfree(map);
8380 out:
8381 mutex_unlock(&trace_eval_mutex);
8382 }
8383 #else
8384 static inline void trace_module_remove_evals(struct module *mod) { }
8385 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8386
8387 static int trace_module_notify(struct notifier_block *self,
8388 unsigned long val, void *data)
8389 {
8390 struct module *mod = data;
8391
8392 switch (val) {
8393 case MODULE_STATE_COMING:
8394 trace_module_add_evals(mod);
8395 break;
8396 case MODULE_STATE_GOING:
8397 trace_module_remove_evals(mod);
8398 break;
8399 }
8400
8401 return 0;
8402 }
8403
8404 static struct notifier_block trace_module_nb = {
8405 .notifier_call = trace_module_notify,
8406 .priority = 0,
8407 };
8408 #endif /* CONFIG_MODULES */
8409
8410 static __init int tracer_init_tracefs(void)
8411 {
8412 struct dentry *d_tracer;
8413
8414 trace_access_lock_init();
8415
8416 d_tracer = tracing_init_dentry();
8417 if (IS_ERR(d_tracer))
8418 return 0;
8419
8420 event_trace_init();
8421
8422 init_tracer_tracefs(&global_trace, d_tracer);
8423 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8424
8425 trace_create_file("tracing_thresh", 0644, d_tracer,
8426 &global_trace, &tracing_thresh_fops);
8427
8428 trace_create_file("README", 0444, d_tracer,
8429 NULL, &tracing_readme_fops);
8430
8431 trace_create_file("saved_cmdlines", 0444, d_tracer,
8432 NULL, &tracing_saved_cmdlines_fops);
8433
8434 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8435 NULL, &tracing_saved_cmdlines_size_fops);
8436
8437 trace_create_file("saved_tgids", 0444, d_tracer,
8438 NULL, &tracing_saved_tgids_fops);
8439
8440 trace_eval_init();
8441
8442 trace_create_eval_file(d_tracer);
8443
8444 #ifdef CONFIG_MODULES
8445 register_module_notifier(&trace_module_nb);
8446 #endif
8447
8448 #ifdef CONFIG_DYNAMIC_FTRACE
8449 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8450 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8451 #endif
8452
8453 create_trace_instances(d_tracer);
8454
8455 update_tracer_options(&global_trace);
8456
8457 return 0;
8458 }
8459
8460 static int trace_panic_handler(struct notifier_block *this,
8461 unsigned long event, void *unused)
8462 {
8463 if (ftrace_dump_on_oops)
8464 ftrace_dump(ftrace_dump_on_oops);
8465 return NOTIFY_OK;
8466 }
8467
8468 static struct notifier_block trace_panic_notifier = {
8469 .notifier_call = trace_panic_handler,
8470 .next = NULL,
8471 .priority = 150 /* priority: INT_MAX >= x >= 0 */
8472 };
8473
8474 static int trace_die_handler(struct notifier_block *self,
8475 unsigned long val,
8476 void *data)
8477 {
8478 switch (val) {
8479 case DIE_OOPS:
8480 if (ftrace_dump_on_oops)
8481 ftrace_dump(ftrace_dump_on_oops);
8482 break;
8483 default:
8484 break;
8485 }
8486 return NOTIFY_OK;
8487 }
8488
8489 static struct notifier_block trace_die_notifier = {
8490 .notifier_call = trace_die_handler,
8491 .priority = 200
8492 };
8493
8494 /*
8495 * printk is set to max of 1024, we really don't need it that big.
8496 * Nothing should be printing 1000 characters anyway.
8497 */
8498 #define TRACE_MAX_PRINT 1000
8499
8500 /*
8501 * Define here KERN_TRACE so that we have one place to modify
8502 * it if we decide to change what log level the ftrace dump
8503 * should be at.
8504 */
8505 #define KERN_TRACE KERN_EMERG
8506
8507 void
8508 trace_printk_seq(struct trace_seq *s)
8509 {
8510 /* Probably should print a warning here. */
8511 if (s->seq.len >= TRACE_MAX_PRINT)
8512 s->seq.len = TRACE_MAX_PRINT;
8513
8514 /*
8515 * More paranoid code. Although the buffer size is set to
8516 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8517 * an extra layer of protection.
8518 */
8519 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8520 s->seq.len = s->seq.size - 1;
8521
8522 /* should be zero ended, but we are paranoid. */
8523 s->buffer[s->seq.len] = 0;
8524
8525 printk(KERN_TRACE "%s", s->buffer);
8526
8527 trace_seq_init(s);
8528 }
8529
8530 void trace_init_global_iter(struct trace_iterator *iter)
8531 {
8532 iter->tr = &global_trace;
8533 iter->trace = iter->tr->current_trace;
8534 iter->cpu_file = RING_BUFFER_ALL_CPUS;
8535 iter->trace_buffer = &global_trace.trace_buffer;
8536
8537 if (iter->trace && iter->trace->open)
8538 iter->trace->open(iter);
8539
8540 /* Annotate start of buffers if we had overruns */
8541 if (ring_buffer_overruns(iter->trace_buffer->buffer))
8542 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8543
8544 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8545 if (trace_clocks[iter->tr->clock_id].in_ns)
8546 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8547 }
8548
8549 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8550 {
8551 /* use static because iter can be a bit big for the stack */
8552 static struct trace_iterator iter;
8553 static atomic_t dump_running;
8554 struct trace_array *tr = &global_trace;
8555 unsigned int old_userobj;
8556 unsigned long flags;
8557 int cnt = 0, cpu;
8558
8559 /* Only allow one dump user at a time. */
8560 if (atomic_inc_return(&dump_running) != 1) {
8561 atomic_dec(&dump_running);
8562 return;
8563 }
8564
8565 /*
8566 * Always turn off tracing when we dump.
8567 * We don't need to show trace output of what happens
8568 * between multiple crashes.
8569 *
8570 * If the user does a sysrq-z, then they can re-enable
8571 * tracing with echo 1 > tracing_on.
8572 */
8573 tracing_off();
8574
8575 local_irq_save(flags);
8576 printk_nmi_direct_enter();
8577
8578 /* Simulate the iterator */
8579 trace_init_global_iter(&iter);
8580
8581 for_each_tracing_cpu(cpu) {
8582 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8583 }
8584
8585 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8586
8587 /* don't look at user memory in panic mode */
8588 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8589
8590 switch (oops_dump_mode) {
8591 case DUMP_ALL:
8592 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8593 break;
8594 case DUMP_ORIG:
8595 iter.cpu_file = raw_smp_processor_id();
8596 break;
8597 case DUMP_NONE:
8598 goto out_enable;
8599 default:
8600 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8601 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8602 }
8603
8604 printk(KERN_TRACE "Dumping ftrace buffer:\n");
8605
8606 /* Did function tracer already get disabled? */
8607 if (ftrace_is_dead()) {
8608 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8609 printk("# MAY BE MISSING FUNCTION EVENTS\n");
8610 }
8611
8612 /*
8613 * We need to stop all tracing on all CPUS to read the
8614 * the next buffer. This is a bit expensive, but is
8615 * not done often. We fill all what we can read,
8616 * and then release the locks again.
8617 */
8618
8619 while (!trace_empty(&iter)) {
8620
8621 if (!cnt)
8622 printk(KERN_TRACE "---------------------------------\n");
8623
8624 cnt++;
8625
8626 /* reset all but tr, trace, and overruns */
8627 memset(&iter.seq, 0,
8628 sizeof(struct trace_iterator) -
8629 offsetof(struct trace_iterator, seq));
8630 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8631 iter.pos = -1;
8632
8633 if (trace_find_next_entry_inc(&iter) != NULL) {
8634 int ret;
8635
8636 ret = print_trace_line(&iter);
8637 if (ret != TRACE_TYPE_NO_CONSUME)
8638 trace_consume(&iter);
8639 }
8640 touch_nmi_watchdog();
8641
8642 trace_printk_seq(&iter.seq);
8643 }
8644
8645 if (!cnt)
8646 printk(KERN_TRACE " (ftrace buffer empty)\n");
8647 else
8648 printk(KERN_TRACE "---------------------------------\n");
8649
8650 out_enable:
8651 tr->trace_flags |= old_userobj;
8652
8653 for_each_tracing_cpu(cpu) {
8654 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8655 }
8656 atomic_dec(&dump_running);
8657 printk_nmi_direct_exit();
8658 local_irq_restore(flags);
8659 }
8660 EXPORT_SYMBOL_GPL(ftrace_dump);
8661
8662 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8663 {
8664 char **argv;
8665 int argc, ret;
8666
8667 argc = 0;
8668 ret = 0;
8669 argv = argv_split(GFP_KERNEL, buf, &argc);
8670 if (!argv)
8671 return -ENOMEM;
8672
8673 if (argc)
8674 ret = createfn(argc, argv);
8675
8676 argv_free(argv);
8677
8678 return ret;
8679 }
8680
8681 #define WRITE_BUFSIZE 4096
8682
8683 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8684 size_t count, loff_t *ppos,
8685 int (*createfn)(int, char **))
8686 {
8687 char *kbuf, *buf, *tmp;
8688 int ret = 0;
8689 size_t done = 0;
8690 size_t size;
8691
8692 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8693 if (!kbuf)
8694 return -ENOMEM;
8695
8696 while (done < count) {
8697 size = count - done;
8698
8699 if (size >= WRITE_BUFSIZE)
8700 size = WRITE_BUFSIZE - 1;
8701
8702 if (copy_from_user(kbuf, buffer + done, size)) {
8703 ret = -EFAULT;
8704 goto out;
8705 }
8706 kbuf[size] = '\0';
8707 buf = kbuf;
8708 do {
8709 tmp = strchr(buf, '\n');
8710 if (tmp) {
8711 *tmp = '\0';
8712 size = tmp - buf + 1;
8713 } else {
8714 size = strlen(buf);
8715 if (done + size < count) {
8716 if (buf != kbuf)
8717 break;
8718 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8719 pr_warn("Line length is too long: Should be less than %d\n",
8720 WRITE_BUFSIZE - 2);
8721 ret = -EINVAL;
8722 goto out;
8723 }
8724 }
8725 done += size;
8726
8727 /* Remove comments */
8728 tmp = strchr(buf, '#');
8729
8730 if (tmp)
8731 *tmp = '\0';
8732
8733 ret = trace_run_command(buf, createfn);
8734 if (ret)
8735 goto out;
8736 buf += size;
8737
8738 } while (done < count);
8739 }
8740 ret = done;
8741
8742 out:
8743 kfree(kbuf);
8744
8745 return ret;
8746 }
8747
8748 __init static int tracer_alloc_buffers(void)
8749 {
8750 int ring_buf_size;
8751 int ret = -ENOMEM;
8752
8753 /*
8754 * Make sure we don't accidently add more trace options
8755 * than we have bits for.
8756 */
8757 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8758
8759 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8760 goto out;
8761
8762 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8763 goto out_free_buffer_mask;
8764
8765 /* Only allocate trace_printk buffers if a trace_printk exists */
8766 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8767 /* Must be called before global_trace.buffer is allocated */
8768 trace_printk_init_buffers();
8769
8770 /* To save memory, keep the ring buffer size to its minimum */
8771 if (ring_buffer_expanded)
8772 ring_buf_size = trace_buf_size;
8773 else
8774 ring_buf_size = 1;
8775
8776 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8777 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8778
8779 raw_spin_lock_init(&global_trace.start_lock);
8780
8781 /*
8782 * The prepare callbacks allocates some memory for the ring buffer. We
8783 * don't free the buffer if the if the CPU goes down. If we were to free
8784 * the buffer, then the user would lose any trace that was in the
8785 * buffer. The memory will be removed once the "instance" is removed.
8786 */
8787 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8788 "trace/RB:preapre", trace_rb_cpu_prepare,
8789 NULL);
8790 if (ret < 0)
8791 goto out_free_cpumask;
8792 /* Used for event triggers */
8793 ret = -ENOMEM;
8794 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8795 if (!temp_buffer)
8796 goto out_rm_hp_state;
8797
8798 if (trace_create_savedcmd() < 0)
8799 goto out_free_temp_buffer;
8800
8801 /* TODO: make the number of buffers hot pluggable with CPUS */
8802 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8803 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8804 WARN_ON(1);
8805 goto out_free_savedcmd;
8806 }
8807
8808 if (global_trace.buffer_disabled)
8809 tracing_off();
8810
8811 if (trace_boot_clock) {
8812 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8813 if (ret < 0)
8814 pr_warn("Trace clock %s not defined, going back to default\n",
8815 trace_boot_clock);
8816 }
8817
8818 /*
8819 * register_tracer() might reference current_trace, so it
8820 * needs to be set before we register anything. This is
8821 * just a bootstrap of current_trace anyway.
8822 */
8823 global_trace.current_trace = &nop_trace;
8824
8825 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8826
8827 ftrace_init_global_array_ops(&global_trace);
8828
8829 init_trace_flags_index(&global_trace);
8830
8831 register_tracer(&nop_trace);
8832
8833 /* Function tracing may start here (via kernel command line) */
8834 init_function_trace();
8835
8836 /* All seems OK, enable tracing */
8837 tracing_disabled = 0;
8838
8839 atomic_notifier_chain_register(&panic_notifier_list,
8840 &trace_panic_notifier);
8841
8842 register_die_notifier(&trace_die_notifier);
8843
8844 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8845
8846 INIT_LIST_HEAD(&global_trace.systems);
8847 INIT_LIST_HEAD(&global_trace.events);
8848 INIT_LIST_HEAD(&global_trace.hist_vars);
8849 list_add(&global_trace.list, &ftrace_trace_arrays);
8850
8851 apply_trace_boot_options();
8852
8853 register_snapshot_cmd();
8854
8855 return 0;
8856
8857 out_free_savedcmd:
8858 free_saved_cmdlines_buffer(savedcmd);
8859 out_free_temp_buffer:
8860 ring_buffer_free(temp_buffer);
8861 out_rm_hp_state:
8862 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8863 out_free_cpumask:
8864 free_cpumask_var(global_trace.tracing_cpumask);
8865 out_free_buffer_mask:
8866 free_cpumask_var(tracing_buffer_mask);
8867 out:
8868 return ret;
8869 }
8870
8871 void __init early_trace_init(void)
8872 {
8873 if (tracepoint_printk) {
8874 tracepoint_print_iter =
8875 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8876 if (WARN_ON(!tracepoint_print_iter))
8877 tracepoint_printk = 0;
8878 else
8879 static_key_enable(&tracepoint_printk_key.key);
8880 }
8881 tracer_alloc_buffers();
8882 }
8883
8884 void __init trace_init(void)
8885 {
8886 trace_event_init();
8887 }
8888
8889 __init static int clear_boot_tracer(void)
8890 {
8891 /*
8892 * The default tracer at boot buffer is an init section.
8893 * This function is called in lateinit. If we did not
8894 * find the boot tracer, then clear it out, to prevent
8895 * later registration from accessing the buffer that is
8896 * about to be freed.
8897 */
8898 if (!default_bootup_tracer)
8899 return 0;
8900
8901 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8902 default_bootup_tracer);
8903 default_bootup_tracer = NULL;
8904
8905 return 0;
8906 }
8907
8908 fs_initcall(tracer_init_tracefs);
8909 late_initcall_sync(clear_boot_tracer);
8910
8911 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8912 __init static int tracing_set_default_clock(void)
8913 {
8914 /* sched_clock_stable() is determined in late_initcall */
8915 if (!trace_boot_clock && !sched_clock_stable()) {
8916 printk(KERN_WARNING
8917 "Unstable clock detected, switching default tracing clock to \"global\"\n"
8918 "If you want to keep using the local clock, then add:\n"
8919 " \"trace_clock=local\"\n"
8920 "on the kernel command line\n");
8921 tracing_set_clock(&global_trace, "global");
8922 }
8923
8924 return 0;
8925 }
8926 late_initcall_sync(tracing_set_default_clock);
8927 #endif