]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - kernel/trace/trace.c
tracing: Have trace event string test handle zero length strings
[mirror_ubuntu-jammy-kernel.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57 * On boot up, the ring buffer is set to the minimum size, so that
58 * we do not waste memory on systems that are not using tracing.
59 */
60 bool ring_buffer_expanded;
61
62 /*
63 * We need to change this state when a selftest is running.
64 * A selftest will lurk into the ring-buffer to count the
65 * entries inserted during the selftest although some concurrent
66 * insertions into the ring-buffer such as trace_printk could occurred
67 * at the same time, giving false positive or negative results.
68 */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72 * If boot-time tracing including tracers/events via kernel cmdline
73 * is running, we do not want to run SELFTEST.
74 */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 if (!tracing_selftest_disabled) {
81 tracing_selftest_disabled = true;
82 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 return 0;
102 }
103
104 /*
105 * To prevent the comm cache from being overwritten when no
106 * tracing is active, only save the comm when a trace event
107 * occurred.
108 */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112 * Kill all tracing for good (never come back).
113 * It is initialized to 1 but will turn to zero if the initialization
114 * of the tracer is successful. But that is the only place that sets
115 * this back to zero.
116 */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly tracing_buffer_mask;
120
121 /*
122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123 *
124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125 * is set, then ftrace_dump is called. This will output the contents
126 * of the ftrace buffers to the console. This is very useful for
127 * capturing traces that lead to crashes and outputing it to a
128 * serial console.
129 *
130 * It is default off, but you can enable it with either specifying
131 * "ftrace_dump_on_oops" in the kernel command line, or setting
132 * /proc/sys/kernel/ftrace_dump_on_oops
133 * Set 1 if you want to dump buffers of all CPUs
134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
135 */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 struct module *mod;
146 unsigned long length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152 /*
153 * "end" is first and points to NULL as it must be different
154 * than "mod" or "eval_string"
155 */
156 union trace_eval_map_item *next;
157 const char *end; /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163 * The trace_eval_maps are saved in an array with two extra elements,
164 * one at the beginning, and one at the end. The beginning item contains
165 * the count of the saved maps (head.length), and the module they
166 * belong to if not built in (head.mod). The ending item contains a
167 * pointer to the next array of saved eval_map items.
168 */
169 union trace_eval_map_item {
170 struct trace_eval_map map;
171 struct trace_eval_map_head head;
172 struct trace_eval_map_tail tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 struct trace_buffer *buffer,
181 unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE 100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 default_bootup_tracer = bootup_tracer_buf;
193 /* We are using ftrace early, expand it */
194 ring_buffer_expanded = true;
195 return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 ftrace_dump_on_oops = DUMP_ALL;
203 return 1;
204 }
205
206 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 ftrace_dump_on_oops = DUMP_ORIG;
208 return 1;
209 }
210
211 return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 __disable_trace_on_warning = 1;
219 return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 allocate_snapshot = true;
226 /* We also need the main ring buffer expanded */
227 ring_buffer_expanded = true;
228 return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 return 1;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 trace_boot_clock = trace_boot_clock_buf;
249 return 1;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255 /* Ignore the "tp_printk_stop_on_boot" param */
256 if (*str == '_')
257 return 0;
258
259 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
260 tracepoint_printk = 1;
261 return 1;
262 }
263 __setup("tp_printk", set_tracepoint_printk);
264
265 static int __init set_tracepoint_printk_stop(char *str)
266 {
267 tracepoint_printk_stop_on_boot = true;
268 return 1;
269 }
270 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
271
272 unsigned long long ns2usecs(u64 nsec)
273 {
274 nsec += 500;
275 do_div(nsec, 1000);
276 return nsec;
277 }
278
279 static void
280 trace_process_export(struct trace_export *export,
281 struct ring_buffer_event *event, int flag)
282 {
283 struct trace_entry *entry;
284 unsigned int size = 0;
285
286 if (export->flags & flag) {
287 entry = ring_buffer_event_data(event);
288 size = ring_buffer_event_length(event);
289 export->write(export, entry, size);
290 }
291 }
292
293 static DEFINE_MUTEX(ftrace_export_lock);
294
295 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
296
297 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
298 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
299 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
300
301 static inline void ftrace_exports_enable(struct trace_export *export)
302 {
303 if (export->flags & TRACE_EXPORT_FUNCTION)
304 static_branch_inc(&trace_function_exports_enabled);
305
306 if (export->flags & TRACE_EXPORT_EVENT)
307 static_branch_inc(&trace_event_exports_enabled);
308
309 if (export->flags & TRACE_EXPORT_MARKER)
310 static_branch_inc(&trace_marker_exports_enabled);
311 }
312
313 static inline void ftrace_exports_disable(struct trace_export *export)
314 {
315 if (export->flags & TRACE_EXPORT_FUNCTION)
316 static_branch_dec(&trace_function_exports_enabled);
317
318 if (export->flags & TRACE_EXPORT_EVENT)
319 static_branch_dec(&trace_event_exports_enabled);
320
321 if (export->flags & TRACE_EXPORT_MARKER)
322 static_branch_dec(&trace_marker_exports_enabled);
323 }
324
325 static void ftrace_exports(struct ring_buffer_event *event, int flag)
326 {
327 struct trace_export *export;
328
329 preempt_disable_notrace();
330
331 export = rcu_dereference_raw_check(ftrace_exports_list);
332 while (export) {
333 trace_process_export(export, event, flag);
334 export = rcu_dereference_raw_check(export->next);
335 }
336
337 preempt_enable_notrace();
338 }
339
340 static inline void
341 add_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343 rcu_assign_pointer(export->next, *list);
344 /*
345 * We are entering export into the list but another
346 * CPU might be walking that list. We need to make sure
347 * the export->next pointer is valid before another CPU sees
348 * the export pointer included into the list.
349 */
350 rcu_assign_pointer(*list, export);
351 }
352
353 static inline int
354 rm_trace_export(struct trace_export **list, struct trace_export *export)
355 {
356 struct trace_export **p;
357
358 for (p = list; *p != NULL; p = &(*p)->next)
359 if (*p == export)
360 break;
361
362 if (*p != export)
363 return -1;
364
365 rcu_assign_pointer(*p, (*p)->next);
366
367 return 0;
368 }
369
370 static inline void
371 add_ftrace_export(struct trace_export **list, struct trace_export *export)
372 {
373 ftrace_exports_enable(export);
374
375 add_trace_export(list, export);
376 }
377
378 static inline int
379 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
380 {
381 int ret;
382
383 ret = rm_trace_export(list, export);
384 ftrace_exports_disable(export);
385
386 return ret;
387 }
388
389 int register_ftrace_export(struct trace_export *export)
390 {
391 if (WARN_ON_ONCE(!export->write))
392 return -1;
393
394 mutex_lock(&ftrace_export_lock);
395
396 add_ftrace_export(&ftrace_exports_list, export);
397
398 mutex_unlock(&ftrace_export_lock);
399
400 return 0;
401 }
402 EXPORT_SYMBOL_GPL(register_ftrace_export);
403
404 int unregister_ftrace_export(struct trace_export *export)
405 {
406 int ret;
407
408 mutex_lock(&ftrace_export_lock);
409
410 ret = rm_ftrace_export(&ftrace_exports_list, export);
411
412 mutex_unlock(&ftrace_export_lock);
413
414 return ret;
415 }
416 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
417
418 /* trace_flags holds trace_options default values */
419 #define TRACE_DEFAULT_FLAGS \
420 (FUNCTION_DEFAULT_FLAGS | \
421 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
422 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
423 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
424 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
425 TRACE_ITER_HASH_PTR)
426
427 /* trace_options that are only supported by global_trace */
428 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
429 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
430
431 /* trace_flags that are default zero for instances */
432 #define ZEROED_TRACE_FLAGS \
433 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
434
435 /*
436 * The global_trace is the descriptor that holds the top-level tracing
437 * buffers for the live tracing.
438 */
439 static struct trace_array global_trace = {
440 .trace_flags = TRACE_DEFAULT_FLAGS,
441 };
442
443 LIST_HEAD(ftrace_trace_arrays);
444
445 int trace_array_get(struct trace_array *this_tr)
446 {
447 struct trace_array *tr;
448 int ret = -ENODEV;
449
450 mutex_lock(&trace_types_lock);
451 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
452 if (tr == this_tr) {
453 tr->ref++;
454 ret = 0;
455 break;
456 }
457 }
458 mutex_unlock(&trace_types_lock);
459
460 return ret;
461 }
462
463 static void __trace_array_put(struct trace_array *this_tr)
464 {
465 WARN_ON(!this_tr->ref);
466 this_tr->ref--;
467 }
468
469 /**
470 * trace_array_put - Decrement the reference counter for this trace array.
471 * @this_tr : pointer to the trace array
472 *
473 * NOTE: Use this when we no longer need the trace array returned by
474 * trace_array_get_by_name(). This ensures the trace array can be later
475 * destroyed.
476 *
477 */
478 void trace_array_put(struct trace_array *this_tr)
479 {
480 if (!this_tr)
481 return;
482
483 mutex_lock(&trace_types_lock);
484 __trace_array_put(this_tr);
485 mutex_unlock(&trace_types_lock);
486 }
487 EXPORT_SYMBOL_GPL(trace_array_put);
488
489 int tracing_check_open_get_tr(struct trace_array *tr)
490 {
491 int ret;
492
493 ret = security_locked_down(LOCKDOWN_TRACEFS);
494 if (ret)
495 return ret;
496
497 if (tracing_disabled)
498 return -ENODEV;
499
500 if (tr && trace_array_get(tr) < 0)
501 return -ENODEV;
502
503 return 0;
504 }
505
506 int call_filter_check_discard(struct trace_event_call *call, void *rec,
507 struct trace_buffer *buffer,
508 struct ring_buffer_event *event)
509 {
510 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
511 !filter_match_preds(call->filter, rec)) {
512 __trace_event_discard_commit(buffer, event);
513 return 1;
514 }
515
516 return 0;
517 }
518
519 void trace_free_pid_list(struct trace_pid_list *pid_list)
520 {
521 vfree(pid_list->pids);
522 kfree(pid_list);
523 }
524
525 /**
526 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
527 * @filtered_pids: The list of pids to check
528 * @search_pid: The PID to find in @filtered_pids
529 *
530 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
531 */
532 bool
533 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
534 {
535 /*
536 * If pid_max changed after filtered_pids was created, we
537 * by default ignore all pids greater than the previous pid_max.
538 */
539 if (search_pid >= filtered_pids->pid_max)
540 return false;
541
542 return test_bit(search_pid, filtered_pids->pids);
543 }
544
545 /**
546 * trace_ignore_this_task - should a task be ignored for tracing
547 * @filtered_pids: The list of pids to check
548 * @filtered_no_pids: The list of pids not to be traced
549 * @task: The task that should be ignored if not filtered
550 *
551 * Checks if @task should be traced or not from @filtered_pids.
552 * Returns true if @task should *NOT* be traced.
553 * Returns false if @task should be traced.
554 */
555 bool
556 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
557 struct trace_pid_list *filtered_no_pids,
558 struct task_struct *task)
559 {
560 /*
561 * If filtered_no_pids is not empty, and the task's pid is listed
562 * in filtered_no_pids, then return true.
563 * Otherwise, if filtered_pids is empty, that means we can
564 * trace all tasks. If it has content, then only trace pids
565 * within filtered_pids.
566 */
567
568 return (filtered_pids &&
569 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
570 (filtered_no_pids &&
571 trace_find_filtered_pid(filtered_no_pids, task->pid));
572 }
573
574 /**
575 * trace_filter_add_remove_task - Add or remove a task from a pid_list
576 * @pid_list: The list to modify
577 * @self: The current task for fork or NULL for exit
578 * @task: The task to add or remove
579 *
580 * If adding a task, if @self is defined, the task is only added if @self
581 * is also included in @pid_list. This happens on fork and tasks should
582 * only be added when the parent is listed. If @self is NULL, then the
583 * @task pid will be removed from the list, which would happen on exit
584 * of a task.
585 */
586 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
587 struct task_struct *self,
588 struct task_struct *task)
589 {
590 if (!pid_list)
591 return;
592
593 /* For forks, we only add if the forking task is listed */
594 if (self) {
595 if (!trace_find_filtered_pid(pid_list, self->pid))
596 return;
597 }
598
599 /* Sorry, but we don't support pid_max changing after setting */
600 if (task->pid >= pid_list->pid_max)
601 return;
602
603 /* "self" is set for forks, and NULL for exits */
604 if (self)
605 set_bit(task->pid, pid_list->pids);
606 else
607 clear_bit(task->pid, pid_list->pids);
608 }
609
610 /**
611 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
612 * @pid_list: The pid list to show
613 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
614 * @pos: The position of the file
615 *
616 * This is used by the seq_file "next" operation to iterate the pids
617 * listed in a trace_pid_list structure.
618 *
619 * Returns the pid+1 as we want to display pid of zero, but NULL would
620 * stop the iteration.
621 */
622 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
623 {
624 unsigned long pid = (unsigned long)v;
625
626 (*pos)++;
627
628 /* pid already is +1 of the actual previous bit */
629 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
630
631 /* Return pid + 1 to allow zero to be represented */
632 if (pid < pid_list->pid_max)
633 return (void *)(pid + 1);
634
635 return NULL;
636 }
637
638 /**
639 * trace_pid_start - Used for seq_file to start reading pid lists
640 * @pid_list: The pid list to show
641 * @pos: The position of the file
642 *
643 * This is used by seq_file "start" operation to start the iteration
644 * of listing pids.
645 *
646 * Returns the pid+1 as we want to display pid of zero, but NULL would
647 * stop the iteration.
648 */
649 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
650 {
651 unsigned long pid;
652 loff_t l = 0;
653
654 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
655 if (pid >= pid_list->pid_max)
656 return NULL;
657
658 /* Return pid + 1 so that zero can be the exit value */
659 for (pid++; pid && l < *pos;
660 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
661 ;
662 return (void *)pid;
663 }
664
665 /**
666 * trace_pid_show - show the current pid in seq_file processing
667 * @m: The seq_file structure to write into
668 * @v: A void pointer of the pid (+1) value to display
669 *
670 * Can be directly used by seq_file operations to display the current
671 * pid value.
672 */
673 int trace_pid_show(struct seq_file *m, void *v)
674 {
675 unsigned long pid = (unsigned long)v - 1;
676
677 seq_printf(m, "%lu\n", pid);
678 return 0;
679 }
680
681 /* 128 should be much more than enough */
682 #define PID_BUF_SIZE 127
683
684 int trace_pid_write(struct trace_pid_list *filtered_pids,
685 struct trace_pid_list **new_pid_list,
686 const char __user *ubuf, size_t cnt)
687 {
688 struct trace_pid_list *pid_list;
689 struct trace_parser parser;
690 unsigned long val;
691 int nr_pids = 0;
692 ssize_t read = 0;
693 ssize_t ret = 0;
694 loff_t pos;
695 pid_t pid;
696
697 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
698 return -ENOMEM;
699
700 /*
701 * Always recreate a new array. The write is an all or nothing
702 * operation. Always create a new array when adding new pids by
703 * the user. If the operation fails, then the current list is
704 * not modified.
705 */
706 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
707 if (!pid_list) {
708 trace_parser_put(&parser);
709 return -ENOMEM;
710 }
711
712 pid_list->pid_max = READ_ONCE(pid_max);
713
714 /* Only truncating will shrink pid_max */
715 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
716 pid_list->pid_max = filtered_pids->pid_max;
717
718 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
719 if (!pid_list->pids) {
720 trace_parser_put(&parser);
721 kfree(pid_list);
722 return -ENOMEM;
723 }
724
725 if (filtered_pids) {
726 /* copy the current bits to the new max */
727 for_each_set_bit(pid, filtered_pids->pids,
728 filtered_pids->pid_max) {
729 set_bit(pid, pid_list->pids);
730 nr_pids++;
731 }
732 }
733
734 while (cnt > 0) {
735
736 pos = 0;
737
738 ret = trace_get_user(&parser, ubuf, cnt, &pos);
739 if (ret < 0 || !trace_parser_loaded(&parser))
740 break;
741
742 read += ret;
743 ubuf += ret;
744 cnt -= ret;
745
746 ret = -EINVAL;
747 if (kstrtoul(parser.buffer, 0, &val))
748 break;
749 if (val >= pid_list->pid_max)
750 break;
751
752 pid = (pid_t)val;
753
754 set_bit(pid, pid_list->pids);
755 nr_pids++;
756
757 trace_parser_clear(&parser);
758 ret = 0;
759 }
760 trace_parser_put(&parser);
761
762 if (ret < 0) {
763 trace_free_pid_list(pid_list);
764 return ret;
765 }
766
767 if (!nr_pids) {
768 /* Cleared the list of pids */
769 trace_free_pid_list(pid_list);
770 read = ret;
771 pid_list = NULL;
772 }
773
774 *new_pid_list = pid_list;
775
776 return read;
777 }
778
779 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
780 {
781 u64 ts;
782
783 /* Early boot up does not have a buffer yet */
784 if (!buf->buffer)
785 return trace_clock_local();
786
787 ts = ring_buffer_time_stamp(buf->buffer);
788 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
789
790 return ts;
791 }
792
793 u64 ftrace_now(int cpu)
794 {
795 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
796 }
797
798 /**
799 * tracing_is_enabled - Show if global_trace has been enabled
800 *
801 * Shows if the global trace has been enabled or not. It uses the
802 * mirror flag "buffer_disabled" to be used in fast paths such as for
803 * the irqsoff tracer. But it may be inaccurate due to races. If you
804 * need to know the accurate state, use tracing_is_on() which is a little
805 * slower, but accurate.
806 */
807 int tracing_is_enabled(void)
808 {
809 /*
810 * For quick access (irqsoff uses this in fast path), just
811 * return the mirror variable of the state of the ring buffer.
812 * It's a little racy, but we don't really care.
813 */
814 smp_rmb();
815 return !global_trace.buffer_disabled;
816 }
817
818 /*
819 * trace_buf_size is the size in bytes that is allocated
820 * for a buffer. Note, the number of bytes is always rounded
821 * to page size.
822 *
823 * This number is purposely set to a low number of 16384.
824 * If the dump on oops happens, it will be much appreciated
825 * to not have to wait for all that output. Anyway this can be
826 * boot time and run time configurable.
827 */
828 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
829
830 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
831
832 /* trace_types holds a link list of available tracers. */
833 static struct tracer *trace_types __read_mostly;
834
835 /*
836 * trace_types_lock is used to protect the trace_types list.
837 */
838 DEFINE_MUTEX(trace_types_lock);
839
840 /*
841 * serialize the access of the ring buffer
842 *
843 * ring buffer serializes readers, but it is low level protection.
844 * The validity of the events (which returns by ring_buffer_peek() ..etc)
845 * are not protected by ring buffer.
846 *
847 * The content of events may become garbage if we allow other process consumes
848 * these events concurrently:
849 * A) the page of the consumed events may become a normal page
850 * (not reader page) in ring buffer, and this page will be rewritten
851 * by events producer.
852 * B) The page of the consumed events may become a page for splice_read,
853 * and this page will be returned to system.
854 *
855 * These primitives allow multi process access to different cpu ring buffer
856 * concurrently.
857 *
858 * These primitives don't distinguish read-only and read-consume access.
859 * Multi read-only access are also serialized.
860 */
861
862 #ifdef CONFIG_SMP
863 static DECLARE_RWSEM(all_cpu_access_lock);
864 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
865
866 static inline void trace_access_lock(int cpu)
867 {
868 if (cpu == RING_BUFFER_ALL_CPUS) {
869 /* gain it for accessing the whole ring buffer. */
870 down_write(&all_cpu_access_lock);
871 } else {
872 /* gain it for accessing a cpu ring buffer. */
873
874 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
875 down_read(&all_cpu_access_lock);
876
877 /* Secondly block other access to this @cpu ring buffer. */
878 mutex_lock(&per_cpu(cpu_access_lock, cpu));
879 }
880 }
881
882 static inline void trace_access_unlock(int cpu)
883 {
884 if (cpu == RING_BUFFER_ALL_CPUS) {
885 up_write(&all_cpu_access_lock);
886 } else {
887 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
888 up_read(&all_cpu_access_lock);
889 }
890 }
891
892 static inline void trace_access_lock_init(void)
893 {
894 int cpu;
895
896 for_each_possible_cpu(cpu)
897 mutex_init(&per_cpu(cpu_access_lock, cpu));
898 }
899
900 #else
901
902 static DEFINE_MUTEX(access_lock);
903
904 static inline void trace_access_lock(int cpu)
905 {
906 (void)cpu;
907 mutex_lock(&access_lock);
908 }
909
910 static inline void trace_access_unlock(int cpu)
911 {
912 (void)cpu;
913 mutex_unlock(&access_lock);
914 }
915
916 static inline void trace_access_lock_init(void)
917 {
918 }
919
920 #endif
921
922 #ifdef CONFIG_STACKTRACE
923 static void __ftrace_trace_stack(struct trace_buffer *buffer,
924 unsigned int trace_ctx,
925 int skip, struct pt_regs *regs);
926 static inline void ftrace_trace_stack(struct trace_array *tr,
927 struct trace_buffer *buffer,
928 unsigned int trace_ctx,
929 int skip, struct pt_regs *regs);
930
931 #else
932 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
933 unsigned int trace_ctx,
934 int skip, struct pt_regs *regs)
935 {
936 }
937 static inline void ftrace_trace_stack(struct trace_array *tr,
938 struct trace_buffer *buffer,
939 unsigned long trace_ctx,
940 int skip, struct pt_regs *regs)
941 {
942 }
943
944 #endif
945
946 static __always_inline void
947 trace_event_setup(struct ring_buffer_event *event,
948 int type, unsigned int trace_ctx)
949 {
950 struct trace_entry *ent = ring_buffer_event_data(event);
951
952 tracing_generic_entry_update(ent, type, trace_ctx);
953 }
954
955 static __always_inline struct ring_buffer_event *
956 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
957 int type,
958 unsigned long len,
959 unsigned int trace_ctx)
960 {
961 struct ring_buffer_event *event;
962
963 event = ring_buffer_lock_reserve(buffer, len);
964 if (event != NULL)
965 trace_event_setup(event, type, trace_ctx);
966
967 return event;
968 }
969
970 void tracer_tracing_on(struct trace_array *tr)
971 {
972 if (tr->array_buffer.buffer)
973 ring_buffer_record_on(tr->array_buffer.buffer);
974 /*
975 * This flag is looked at when buffers haven't been allocated
976 * yet, or by some tracers (like irqsoff), that just want to
977 * know if the ring buffer has been disabled, but it can handle
978 * races of where it gets disabled but we still do a record.
979 * As the check is in the fast path of the tracers, it is more
980 * important to be fast than accurate.
981 */
982 tr->buffer_disabled = 0;
983 /* Make the flag seen by readers */
984 smp_wmb();
985 }
986
987 /**
988 * tracing_on - enable tracing buffers
989 *
990 * This function enables tracing buffers that may have been
991 * disabled with tracing_off.
992 */
993 void tracing_on(void)
994 {
995 tracer_tracing_on(&global_trace);
996 }
997 EXPORT_SYMBOL_GPL(tracing_on);
998
999
1000 static __always_inline void
1001 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1002 {
1003 __this_cpu_write(trace_taskinfo_save, true);
1004
1005 /* If this is the temp buffer, we need to commit fully */
1006 if (this_cpu_read(trace_buffered_event) == event) {
1007 /* Length is in event->array[0] */
1008 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1009 /* Release the temp buffer */
1010 this_cpu_dec(trace_buffered_event_cnt);
1011 } else
1012 ring_buffer_unlock_commit(buffer, event);
1013 }
1014
1015 /**
1016 * __trace_puts - write a constant string into the trace buffer.
1017 * @ip: The address of the caller
1018 * @str: The constant string to write
1019 * @size: The size of the string.
1020 */
1021 int __trace_puts(unsigned long ip, const char *str, int size)
1022 {
1023 struct ring_buffer_event *event;
1024 struct trace_buffer *buffer;
1025 struct print_entry *entry;
1026 unsigned int trace_ctx;
1027 int alloc;
1028
1029 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1030 return 0;
1031
1032 if (unlikely(tracing_selftest_running || tracing_disabled))
1033 return 0;
1034
1035 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1036
1037 trace_ctx = tracing_gen_ctx();
1038 buffer = global_trace.array_buffer.buffer;
1039 ring_buffer_nest_start(buffer);
1040 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1041 trace_ctx);
1042 if (!event) {
1043 size = 0;
1044 goto out;
1045 }
1046
1047 entry = ring_buffer_event_data(event);
1048 entry->ip = ip;
1049
1050 memcpy(&entry->buf, str, size);
1051
1052 /* Add a newline if necessary */
1053 if (entry->buf[size - 1] != '\n') {
1054 entry->buf[size] = '\n';
1055 entry->buf[size + 1] = '\0';
1056 } else
1057 entry->buf[size] = '\0';
1058
1059 __buffer_unlock_commit(buffer, event);
1060 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1061 out:
1062 ring_buffer_nest_end(buffer);
1063 return size;
1064 }
1065 EXPORT_SYMBOL_GPL(__trace_puts);
1066
1067 /**
1068 * __trace_bputs - write the pointer to a constant string into trace buffer
1069 * @ip: The address of the caller
1070 * @str: The constant string to write to the buffer to
1071 */
1072 int __trace_bputs(unsigned long ip, const char *str)
1073 {
1074 struct ring_buffer_event *event;
1075 struct trace_buffer *buffer;
1076 struct bputs_entry *entry;
1077 unsigned int trace_ctx;
1078 int size = sizeof(struct bputs_entry);
1079 int ret = 0;
1080
1081 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1082 return 0;
1083
1084 if (unlikely(tracing_selftest_running || tracing_disabled))
1085 return 0;
1086
1087 trace_ctx = tracing_gen_ctx();
1088 buffer = global_trace.array_buffer.buffer;
1089
1090 ring_buffer_nest_start(buffer);
1091 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1092 trace_ctx);
1093 if (!event)
1094 goto out;
1095
1096 entry = ring_buffer_event_data(event);
1097 entry->ip = ip;
1098 entry->str = str;
1099
1100 __buffer_unlock_commit(buffer, event);
1101 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1102
1103 ret = 1;
1104 out:
1105 ring_buffer_nest_end(buffer);
1106 return ret;
1107 }
1108 EXPORT_SYMBOL_GPL(__trace_bputs);
1109
1110 #ifdef CONFIG_TRACER_SNAPSHOT
1111 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1112 void *cond_data)
1113 {
1114 struct tracer *tracer = tr->current_trace;
1115 unsigned long flags;
1116
1117 if (in_nmi()) {
1118 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1119 internal_trace_puts("*** snapshot is being ignored ***\n");
1120 return;
1121 }
1122
1123 if (!tr->allocated_snapshot) {
1124 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1125 internal_trace_puts("*** stopping trace here! ***\n");
1126 tracing_off();
1127 return;
1128 }
1129
1130 /* Note, snapshot can not be used when the tracer uses it */
1131 if (tracer->use_max_tr) {
1132 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1133 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1134 return;
1135 }
1136
1137 local_irq_save(flags);
1138 update_max_tr(tr, current, smp_processor_id(), cond_data);
1139 local_irq_restore(flags);
1140 }
1141
1142 void tracing_snapshot_instance(struct trace_array *tr)
1143 {
1144 tracing_snapshot_instance_cond(tr, NULL);
1145 }
1146
1147 /**
1148 * tracing_snapshot - take a snapshot of the current buffer.
1149 *
1150 * This causes a swap between the snapshot buffer and the current live
1151 * tracing buffer. You can use this to take snapshots of the live
1152 * trace when some condition is triggered, but continue to trace.
1153 *
1154 * Note, make sure to allocate the snapshot with either
1155 * a tracing_snapshot_alloc(), or by doing it manually
1156 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1157 *
1158 * If the snapshot buffer is not allocated, it will stop tracing.
1159 * Basically making a permanent snapshot.
1160 */
1161 void tracing_snapshot(void)
1162 {
1163 struct trace_array *tr = &global_trace;
1164
1165 tracing_snapshot_instance(tr);
1166 }
1167 EXPORT_SYMBOL_GPL(tracing_snapshot);
1168
1169 /**
1170 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1171 * @tr: The tracing instance to snapshot
1172 * @cond_data: The data to be tested conditionally, and possibly saved
1173 *
1174 * This is the same as tracing_snapshot() except that the snapshot is
1175 * conditional - the snapshot will only happen if the
1176 * cond_snapshot.update() implementation receiving the cond_data
1177 * returns true, which means that the trace array's cond_snapshot
1178 * update() operation used the cond_data to determine whether the
1179 * snapshot should be taken, and if it was, presumably saved it along
1180 * with the snapshot.
1181 */
1182 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1183 {
1184 tracing_snapshot_instance_cond(tr, cond_data);
1185 }
1186 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1187
1188 /**
1189 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1190 * @tr: The tracing instance
1191 *
1192 * When the user enables a conditional snapshot using
1193 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1194 * with the snapshot. This accessor is used to retrieve it.
1195 *
1196 * Should not be called from cond_snapshot.update(), since it takes
1197 * the tr->max_lock lock, which the code calling
1198 * cond_snapshot.update() has already done.
1199 *
1200 * Returns the cond_data associated with the trace array's snapshot.
1201 */
1202 void *tracing_cond_snapshot_data(struct trace_array *tr)
1203 {
1204 void *cond_data = NULL;
1205
1206 arch_spin_lock(&tr->max_lock);
1207
1208 if (tr->cond_snapshot)
1209 cond_data = tr->cond_snapshot->cond_data;
1210
1211 arch_spin_unlock(&tr->max_lock);
1212
1213 return cond_data;
1214 }
1215 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1216
1217 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1218 struct array_buffer *size_buf, int cpu_id);
1219 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1220
1221 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1222 {
1223 int ret;
1224
1225 if (!tr->allocated_snapshot) {
1226
1227 /* allocate spare buffer */
1228 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1229 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1230 if (ret < 0)
1231 return ret;
1232
1233 tr->allocated_snapshot = true;
1234 }
1235
1236 return 0;
1237 }
1238
1239 static void free_snapshot(struct trace_array *tr)
1240 {
1241 /*
1242 * We don't free the ring buffer. instead, resize it because
1243 * The max_tr ring buffer has some state (e.g. ring->clock) and
1244 * we want preserve it.
1245 */
1246 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1247 set_buffer_entries(&tr->max_buffer, 1);
1248 tracing_reset_online_cpus(&tr->max_buffer);
1249 tr->allocated_snapshot = false;
1250 }
1251
1252 /**
1253 * tracing_alloc_snapshot - allocate snapshot buffer.
1254 *
1255 * This only allocates the snapshot buffer if it isn't already
1256 * allocated - it doesn't also take a snapshot.
1257 *
1258 * This is meant to be used in cases where the snapshot buffer needs
1259 * to be set up for events that can't sleep but need to be able to
1260 * trigger a snapshot.
1261 */
1262 int tracing_alloc_snapshot(void)
1263 {
1264 struct trace_array *tr = &global_trace;
1265 int ret;
1266
1267 ret = tracing_alloc_snapshot_instance(tr);
1268 WARN_ON(ret < 0);
1269
1270 return ret;
1271 }
1272 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1273
1274 /**
1275 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1276 *
1277 * This is similar to tracing_snapshot(), but it will allocate the
1278 * snapshot buffer if it isn't already allocated. Use this only
1279 * where it is safe to sleep, as the allocation may sleep.
1280 *
1281 * This causes a swap between the snapshot buffer and the current live
1282 * tracing buffer. You can use this to take snapshots of the live
1283 * trace when some condition is triggered, but continue to trace.
1284 */
1285 void tracing_snapshot_alloc(void)
1286 {
1287 int ret;
1288
1289 ret = tracing_alloc_snapshot();
1290 if (ret < 0)
1291 return;
1292
1293 tracing_snapshot();
1294 }
1295 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1296
1297 /**
1298 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1299 * @tr: The tracing instance
1300 * @cond_data: User data to associate with the snapshot
1301 * @update: Implementation of the cond_snapshot update function
1302 *
1303 * Check whether the conditional snapshot for the given instance has
1304 * already been enabled, or if the current tracer is already using a
1305 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1306 * save the cond_data and update function inside.
1307 *
1308 * Returns 0 if successful, error otherwise.
1309 */
1310 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1311 cond_update_fn_t update)
1312 {
1313 struct cond_snapshot *cond_snapshot;
1314 int ret = 0;
1315
1316 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1317 if (!cond_snapshot)
1318 return -ENOMEM;
1319
1320 cond_snapshot->cond_data = cond_data;
1321 cond_snapshot->update = update;
1322
1323 mutex_lock(&trace_types_lock);
1324
1325 ret = tracing_alloc_snapshot_instance(tr);
1326 if (ret)
1327 goto fail_unlock;
1328
1329 if (tr->current_trace->use_max_tr) {
1330 ret = -EBUSY;
1331 goto fail_unlock;
1332 }
1333
1334 /*
1335 * The cond_snapshot can only change to NULL without the
1336 * trace_types_lock. We don't care if we race with it going
1337 * to NULL, but we want to make sure that it's not set to
1338 * something other than NULL when we get here, which we can
1339 * do safely with only holding the trace_types_lock and not
1340 * having to take the max_lock.
1341 */
1342 if (tr->cond_snapshot) {
1343 ret = -EBUSY;
1344 goto fail_unlock;
1345 }
1346
1347 arch_spin_lock(&tr->max_lock);
1348 tr->cond_snapshot = cond_snapshot;
1349 arch_spin_unlock(&tr->max_lock);
1350
1351 mutex_unlock(&trace_types_lock);
1352
1353 return ret;
1354
1355 fail_unlock:
1356 mutex_unlock(&trace_types_lock);
1357 kfree(cond_snapshot);
1358 return ret;
1359 }
1360 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1361
1362 /**
1363 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1364 * @tr: The tracing instance
1365 *
1366 * Check whether the conditional snapshot for the given instance is
1367 * enabled; if so, free the cond_snapshot associated with it,
1368 * otherwise return -EINVAL.
1369 *
1370 * Returns 0 if successful, error otherwise.
1371 */
1372 int tracing_snapshot_cond_disable(struct trace_array *tr)
1373 {
1374 int ret = 0;
1375
1376 arch_spin_lock(&tr->max_lock);
1377
1378 if (!tr->cond_snapshot)
1379 ret = -EINVAL;
1380 else {
1381 kfree(tr->cond_snapshot);
1382 tr->cond_snapshot = NULL;
1383 }
1384
1385 arch_spin_unlock(&tr->max_lock);
1386
1387 return ret;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1390 #else
1391 void tracing_snapshot(void)
1392 {
1393 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_snapshot);
1396 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1397 {
1398 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1401 int tracing_alloc_snapshot(void)
1402 {
1403 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1404 return -ENODEV;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1407 void tracing_snapshot_alloc(void)
1408 {
1409 /* Give warning */
1410 tracing_snapshot();
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1413 void *tracing_cond_snapshot_data(struct trace_array *tr)
1414 {
1415 return NULL;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1418 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1419 {
1420 return -ENODEV;
1421 }
1422 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1423 int tracing_snapshot_cond_disable(struct trace_array *tr)
1424 {
1425 return false;
1426 }
1427 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1428 #endif /* CONFIG_TRACER_SNAPSHOT */
1429
1430 void tracer_tracing_off(struct trace_array *tr)
1431 {
1432 if (tr->array_buffer.buffer)
1433 ring_buffer_record_off(tr->array_buffer.buffer);
1434 /*
1435 * This flag is looked at when buffers haven't been allocated
1436 * yet, or by some tracers (like irqsoff), that just want to
1437 * know if the ring buffer has been disabled, but it can handle
1438 * races of where it gets disabled but we still do a record.
1439 * As the check is in the fast path of the tracers, it is more
1440 * important to be fast than accurate.
1441 */
1442 tr->buffer_disabled = 1;
1443 /* Make the flag seen by readers */
1444 smp_wmb();
1445 }
1446
1447 /**
1448 * tracing_off - turn off tracing buffers
1449 *
1450 * This function stops the tracing buffers from recording data.
1451 * It does not disable any overhead the tracers themselves may
1452 * be causing. This function simply causes all recording to
1453 * the ring buffers to fail.
1454 */
1455 void tracing_off(void)
1456 {
1457 tracer_tracing_off(&global_trace);
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_off);
1460
1461 void disable_trace_on_warning(void)
1462 {
1463 if (__disable_trace_on_warning) {
1464 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1465 "Disabling tracing due to warning\n");
1466 tracing_off();
1467 }
1468 }
1469
1470 /**
1471 * tracer_tracing_is_on - show real state of ring buffer enabled
1472 * @tr : the trace array to know if ring buffer is enabled
1473 *
1474 * Shows real state of the ring buffer if it is enabled or not.
1475 */
1476 bool tracer_tracing_is_on(struct trace_array *tr)
1477 {
1478 if (tr->array_buffer.buffer)
1479 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1480 return !tr->buffer_disabled;
1481 }
1482
1483 /**
1484 * tracing_is_on - show state of ring buffers enabled
1485 */
1486 int tracing_is_on(void)
1487 {
1488 return tracer_tracing_is_on(&global_trace);
1489 }
1490 EXPORT_SYMBOL_GPL(tracing_is_on);
1491
1492 static int __init set_buf_size(char *str)
1493 {
1494 unsigned long buf_size;
1495
1496 if (!str)
1497 return 0;
1498 buf_size = memparse(str, &str);
1499 /*
1500 * nr_entries can not be zero and the startup
1501 * tests require some buffer space. Therefore
1502 * ensure we have at least 4096 bytes of buffer.
1503 */
1504 trace_buf_size = max(4096UL, buf_size);
1505 return 1;
1506 }
1507 __setup("trace_buf_size=", set_buf_size);
1508
1509 static int __init set_tracing_thresh(char *str)
1510 {
1511 unsigned long threshold;
1512 int ret;
1513
1514 if (!str)
1515 return 0;
1516 ret = kstrtoul(str, 0, &threshold);
1517 if (ret < 0)
1518 return 0;
1519 tracing_thresh = threshold * 1000;
1520 return 1;
1521 }
1522 __setup("tracing_thresh=", set_tracing_thresh);
1523
1524 unsigned long nsecs_to_usecs(unsigned long nsecs)
1525 {
1526 return nsecs / 1000;
1527 }
1528
1529 /*
1530 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1531 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1532 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1533 * of strings in the order that the evals (enum) were defined.
1534 */
1535 #undef C
1536 #define C(a, b) b
1537
1538 /* These must match the bit positions in trace_iterator_flags */
1539 static const char *trace_options[] = {
1540 TRACE_FLAGS
1541 NULL
1542 };
1543
1544 static struct {
1545 u64 (*func)(void);
1546 const char *name;
1547 int in_ns; /* is this clock in nanoseconds? */
1548 } trace_clocks[] = {
1549 { trace_clock_local, "local", 1 },
1550 { trace_clock_global, "global", 1 },
1551 { trace_clock_counter, "counter", 0 },
1552 { trace_clock_jiffies, "uptime", 0 },
1553 { trace_clock, "perf", 1 },
1554 { ktime_get_mono_fast_ns, "mono", 1 },
1555 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1556 { ktime_get_boot_fast_ns, "boot", 1 },
1557 ARCH_TRACE_CLOCKS
1558 };
1559
1560 bool trace_clock_in_ns(struct trace_array *tr)
1561 {
1562 if (trace_clocks[tr->clock_id].in_ns)
1563 return true;
1564
1565 return false;
1566 }
1567
1568 /*
1569 * trace_parser_get_init - gets the buffer for trace parser
1570 */
1571 int trace_parser_get_init(struct trace_parser *parser, int size)
1572 {
1573 memset(parser, 0, sizeof(*parser));
1574
1575 parser->buffer = kmalloc(size, GFP_KERNEL);
1576 if (!parser->buffer)
1577 return 1;
1578
1579 parser->size = size;
1580 return 0;
1581 }
1582
1583 /*
1584 * trace_parser_put - frees the buffer for trace parser
1585 */
1586 void trace_parser_put(struct trace_parser *parser)
1587 {
1588 kfree(parser->buffer);
1589 parser->buffer = NULL;
1590 }
1591
1592 /*
1593 * trace_get_user - reads the user input string separated by space
1594 * (matched by isspace(ch))
1595 *
1596 * For each string found the 'struct trace_parser' is updated,
1597 * and the function returns.
1598 *
1599 * Returns number of bytes read.
1600 *
1601 * See kernel/trace/trace.h for 'struct trace_parser' details.
1602 */
1603 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1604 size_t cnt, loff_t *ppos)
1605 {
1606 char ch;
1607 size_t read = 0;
1608 ssize_t ret;
1609
1610 if (!*ppos)
1611 trace_parser_clear(parser);
1612
1613 ret = get_user(ch, ubuf++);
1614 if (ret)
1615 goto out;
1616
1617 read++;
1618 cnt--;
1619
1620 /*
1621 * The parser is not finished with the last write,
1622 * continue reading the user input without skipping spaces.
1623 */
1624 if (!parser->cont) {
1625 /* skip white space */
1626 while (cnt && isspace(ch)) {
1627 ret = get_user(ch, ubuf++);
1628 if (ret)
1629 goto out;
1630 read++;
1631 cnt--;
1632 }
1633
1634 parser->idx = 0;
1635
1636 /* only spaces were written */
1637 if (isspace(ch) || !ch) {
1638 *ppos += read;
1639 ret = read;
1640 goto out;
1641 }
1642 }
1643
1644 /* read the non-space input */
1645 while (cnt && !isspace(ch) && ch) {
1646 if (parser->idx < parser->size - 1)
1647 parser->buffer[parser->idx++] = ch;
1648 else {
1649 ret = -EINVAL;
1650 goto out;
1651 }
1652 ret = get_user(ch, ubuf++);
1653 if (ret)
1654 goto out;
1655 read++;
1656 cnt--;
1657 }
1658
1659 /* We either got finished input or we have to wait for another call. */
1660 if (isspace(ch) || !ch) {
1661 parser->buffer[parser->idx] = 0;
1662 parser->cont = false;
1663 } else if (parser->idx < parser->size - 1) {
1664 parser->cont = true;
1665 parser->buffer[parser->idx++] = ch;
1666 /* Make sure the parsed string always terminates with '\0'. */
1667 parser->buffer[parser->idx] = 0;
1668 } else {
1669 ret = -EINVAL;
1670 goto out;
1671 }
1672
1673 *ppos += read;
1674 ret = read;
1675
1676 out:
1677 return ret;
1678 }
1679
1680 /* TODO add a seq_buf_to_buffer() */
1681 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1682 {
1683 int len;
1684
1685 if (trace_seq_used(s) <= s->seq.readpos)
1686 return -EBUSY;
1687
1688 len = trace_seq_used(s) - s->seq.readpos;
1689 if (cnt > len)
1690 cnt = len;
1691 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1692
1693 s->seq.readpos += cnt;
1694 return cnt;
1695 }
1696
1697 unsigned long __read_mostly tracing_thresh;
1698 static const struct file_operations tracing_max_lat_fops;
1699
1700 #ifdef LATENCY_FS_NOTIFY
1701
1702 static struct workqueue_struct *fsnotify_wq;
1703
1704 static void latency_fsnotify_workfn(struct work_struct *work)
1705 {
1706 struct trace_array *tr = container_of(work, struct trace_array,
1707 fsnotify_work);
1708 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1709 }
1710
1711 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1712 {
1713 struct trace_array *tr = container_of(iwork, struct trace_array,
1714 fsnotify_irqwork);
1715 queue_work(fsnotify_wq, &tr->fsnotify_work);
1716 }
1717
1718 static void trace_create_maxlat_file(struct trace_array *tr,
1719 struct dentry *d_tracer)
1720 {
1721 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1722 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1723 tr->d_max_latency = trace_create_file("tracing_max_latency",
1724 TRACE_MODE_WRITE,
1725 d_tracer, &tr->max_latency,
1726 &tracing_max_lat_fops);
1727 }
1728
1729 __init static int latency_fsnotify_init(void)
1730 {
1731 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1732 WQ_UNBOUND | WQ_HIGHPRI, 0);
1733 if (!fsnotify_wq) {
1734 pr_err("Unable to allocate tr_max_lat_wq\n");
1735 return -ENOMEM;
1736 }
1737 return 0;
1738 }
1739
1740 late_initcall_sync(latency_fsnotify_init);
1741
1742 void latency_fsnotify(struct trace_array *tr)
1743 {
1744 if (!fsnotify_wq)
1745 return;
1746 /*
1747 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1748 * possible that we are called from __schedule() or do_idle(), which
1749 * could cause a deadlock.
1750 */
1751 irq_work_queue(&tr->fsnotify_irqwork);
1752 }
1753
1754 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \
1755 || defined(CONFIG_OSNOISE_TRACER)
1756
1757 #define trace_create_maxlat_file(tr, d_tracer) \
1758 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1759 d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1760
1761 #else
1762 #define trace_create_maxlat_file(tr, d_tracer) do { } while (0)
1763 #endif
1764
1765 #ifdef CONFIG_TRACER_MAX_TRACE
1766 /*
1767 * Copy the new maximum trace into the separate maximum-trace
1768 * structure. (this way the maximum trace is permanently saved,
1769 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1770 */
1771 static void
1772 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1773 {
1774 struct array_buffer *trace_buf = &tr->array_buffer;
1775 struct array_buffer *max_buf = &tr->max_buffer;
1776 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1777 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1778
1779 max_buf->cpu = cpu;
1780 max_buf->time_start = data->preempt_timestamp;
1781
1782 max_data->saved_latency = tr->max_latency;
1783 max_data->critical_start = data->critical_start;
1784 max_data->critical_end = data->critical_end;
1785
1786 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1787 max_data->pid = tsk->pid;
1788 /*
1789 * If tsk == current, then use current_uid(), as that does not use
1790 * RCU. The irq tracer can be called out of RCU scope.
1791 */
1792 if (tsk == current)
1793 max_data->uid = current_uid();
1794 else
1795 max_data->uid = task_uid(tsk);
1796
1797 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1798 max_data->policy = tsk->policy;
1799 max_data->rt_priority = tsk->rt_priority;
1800
1801 /* record this tasks comm */
1802 tracing_record_cmdline(tsk);
1803 latency_fsnotify(tr);
1804 }
1805
1806 /**
1807 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1808 * @tr: tracer
1809 * @tsk: the task with the latency
1810 * @cpu: The cpu that initiated the trace.
1811 * @cond_data: User data associated with a conditional snapshot
1812 *
1813 * Flip the buffers between the @tr and the max_tr and record information
1814 * about which task was the cause of this latency.
1815 */
1816 void
1817 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1818 void *cond_data)
1819 {
1820 if (tr->stop_count)
1821 return;
1822
1823 WARN_ON_ONCE(!irqs_disabled());
1824
1825 if (!tr->allocated_snapshot) {
1826 /* Only the nop tracer should hit this when disabling */
1827 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1828 return;
1829 }
1830
1831 arch_spin_lock(&tr->max_lock);
1832
1833 /* Inherit the recordable setting from array_buffer */
1834 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1835 ring_buffer_record_on(tr->max_buffer.buffer);
1836 else
1837 ring_buffer_record_off(tr->max_buffer.buffer);
1838
1839 #ifdef CONFIG_TRACER_SNAPSHOT
1840 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1841 goto out_unlock;
1842 #endif
1843 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1844
1845 __update_max_tr(tr, tsk, cpu);
1846
1847 out_unlock:
1848 arch_spin_unlock(&tr->max_lock);
1849 }
1850
1851 /**
1852 * update_max_tr_single - only copy one trace over, and reset the rest
1853 * @tr: tracer
1854 * @tsk: task with the latency
1855 * @cpu: the cpu of the buffer to copy.
1856 *
1857 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1858 */
1859 void
1860 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1861 {
1862 int ret;
1863
1864 if (tr->stop_count)
1865 return;
1866
1867 WARN_ON_ONCE(!irqs_disabled());
1868 if (!tr->allocated_snapshot) {
1869 /* Only the nop tracer should hit this when disabling */
1870 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1871 return;
1872 }
1873
1874 arch_spin_lock(&tr->max_lock);
1875
1876 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1877
1878 if (ret == -EBUSY) {
1879 /*
1880 * We failed to swap the buffer due to a commit taking
1881 * place on this CPU. We fail to record, but we reset
1882 * the max trace buffer (no one writes directly to it)
1883 * and flag that it failed.
1884 */
1885 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1886 "Failed to swap buffers due to commit in progress\n");
1887 }
1888
1889 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1890
1891 __update_max_tr(tr, tsk, cpu);
1892 arch_spin_unlock(&tr->max_lock);
1893 }
1894 #endif /* CONFIG_TRACER_MAX_TRACE */
1895
1896 static int wait_on_pipe(struct trace_iterator *iter, int full)
1897 {
1898 /* Iterators are static, they should be filled or empty */
1899 if (trace_buffer_iter(iter, iter->cpu_file))
1900 return 0;
1901
1902 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1903 full);
1904 }
1905
1906 #ifdef CONFIG_FTRACE_STARTUP_TEST
1907 static bool selftests_can_run;
1908
1909 struct trace_selftests {
1910 struct list_head list;
1911 struct tracer *type;
1912 };
1913
1914 static LIST_HEAD(postponed_selftests);
1915
1916 static int save_selftest(struct tracer *type)
1917 {
1918 struct trace_selftests *selftest;
1919
1920 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1921 if (!selftest)
1922 return -ENOMEM;
1923
1924 selftest->type = type;
1925 list_add(&selftest->list, &postponed_selftests);
1926 return 0;
1927 }
1928
1929 static int run_tracer_selftest(struct tracer *type)
1930 {
1931 struct trace_array *tr = &global_trace;
1932 struct tracer *saved_tracer = tr->current_trace;
1933 int ret;
1934
1935 if (!type->selftest || tracing_selftest_disabled)
1936 return 0;
1937
1938 /*
1939 * If a tracer registers early in boot up (before scheduling is
1940 * initialized and such), then do not run its selftests yet.
1941 * Instead, run it a little later in the boot process.
1942 */
1943 if (!selftests_can_run)
1944 return save_selftest(type);
1945
1946 if (!tracing_is_on()) {
1947 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1948 type->name);
1949 return 0;
1950 }
1951
1952 /*
1953 * Run a selftest on this tracer.
1954 * Here we reset the trace buffer, and set the current
1955 * tracer to be this tracer. The tracer can then run some
1956 * internal tracing to verify that everything is in order.
1957 * If we fail, we do not register this tracer.
1958 */
1959 tracing_reset_online_cpus(&tr->array_buffer);
1960
1961 tr->current_trace = type;
1962
1963 #ifdef CONFIG_TRACER_MAX_TRACE
1964 if (type->use_max_tr) {
1965 /* If we expanded the buffers, make sure the max is expanded too */
1966 if (ring_buffer_expanded)
1967 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1968 RING_BUFFER_ALL_CPUS);
1969 tr->allocated_snapshot = true;
1970 }
1971 #endif
1972
1973 /* the test is responsible for initializing and enabling */
1974 pr_info("Testing tracer %s: ", type->name);
1975 ret = type->selftest(type, tr);
1976 /* the test is responsible for resetting too */
1977 tr->current_trace = saved_tracer;
1978 if (ret) {
1979 printk(KERN_CONT "FAILED!\n");
1980 /* Add the warning after printing 'FAILED' */
1981 WARN_ON(1);
1982 return -1;
1983 }
1984 /* Only reset on passing, to avoid touching corrupted buffers */
1985 tracing_reset_online_cpus(&tr->array_buffer);
1986
1987 #ifdef CONFIG_TRACER_MAX_TRACE
1988 if (type->use_max_tr) {
1989 tr->allocated_snapshot = false;
1990
1991 /* Shrink the max buffer again */
1992 if (ring_buffer_expanded)
1993 ring_buffer_resize(tr->max_buffer.buffer, 1,
1994 RING_BUFFER_ALL_CPUS);
1995 }
1996 #endif
1997
1998 printk(KERN_CONT "PASSED\n");
1999 return 0;
2000 }
2001
2002 static __init int init_trace_selftests(void)
2003 {
2004 struct trace_selftests *p, *n;
2005 struct tracer *t, **last;
2006 int ret;
2007
2008 selftests_can_run = true;
2009
2010 mutex_lock(&trace_types_lock);
2011
2012 if (list_empty(&postponed_selftests))
2013 goto out;
2014
2015 pr_info("Running postponed tracer tests:\n");
2016
2017 tracing_selftest_running = true;
2018 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2019 /* This loop can take minutes when sanitizers are enabled, so
2020 * lets make sure we allow RCU processing.
2021 */
2022 cond_resched();
2023 ret = run_tracer_selftest(p->type);
2024 /* If the test fails, then warn and remove from available_tracers */
2025 if (ret < 0) {
2026 WARN(1, "tracer: %s failed selftest, disabling\n",
2027 p->type->name);
2028 last = &trace_types;
2029 for (t = trace_types; t; t = t->next) {
2030 if (t == p->type) {
2031 *last = t->next;
2032 break;
2033 }
2034 last = &t->next;
2035 }
2036 }
2037 list_del(&p->list);
2038 kfree(p);
2039 }
2040 tracing_selftest_running = false;
2041
2042 out:
2043 mutex_unlock(&trace_types_lock);
2044
2045 return 0;
2046 }
2047 core_initcall(init_trace_selftests);
2048 #else
2049 static inline int run_tracer_selftest(struct tracer *type)
2050 {
2051 return 0;
2052 }
2053 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2054
2055 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2056
2057 static void __init apply_trace_boot_options(void);
2058
2059 /**
2060 * register_tracer - register a tracer with the ftrace system.
2061 * @type: the plugin for the tracer
2062 *
2063 * Register a new plugin tracer.
2064 */
2065 int __init register_tracer(struct tracer *type)
2066 {
2067 struct tracer *t;
2068 int ret = 0;
2069
2070 if (!type->name) {
2071 pr_info("Tracer must have a name\n");
2072 return -1;
2073 }
2074
2075 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2076 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2077 return -1;
2078 }
2079
2080 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2081 pr_warn("Can not register tracer %s due to lockdown\n",
2082 type->name);
2083 return -EPERM;
2084 }
2085
2086 mutex_lock(&trace_types_lock);
2087
2088 tracing_selftest_running = true;
2089
2090 for (t = trace_types; t; t = t->next) {
2091 if (strcmp(type->name, t->name) == 0) {
2092 /* already found */
2093 pr_info("Tracer %s already registered\n",
2094 type->name);
2095 ret = -1;
2096 goto out;
2097 }
2098 }
2099
2100 if (!type->set_flag)
2101 type->set_flag = &dummy_set_flag;
2102 if (!type->flags) {
2103 /*allocate a dummy tracer_flags*/
2104 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2105 if (!type->flags) {
2106 ret = -ENOMEM;
2107 goto out;
2108 }
2109 type->flags->val = 0;
2110 type->flags->opts = dummy_tracer_opt;
2111 } else
2112 if (!type->flags->opts)
2113 type->flags->opts = dummy_tracer_opt;
2114
2115 /* store the tracer for __set_tracer_option */
2116 type->flags->trace = type;
2117
2118 ret = run_tracer_selftest(type);
2119 if (ret < 0)
2120 goto out;
2121
2122 type->next = trace_types;
2123 trace_types = type;
2124 add_tracer_options(&global_trace, type);
2125
2126 out:
2127 tracing_selftest_running = false;
2128 mutex_unlock(&trace_types_lock);
2129
2130 if (ret || !default_bootup_tracer)
2131 goto out_unlock;
2132
2133 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2134 goto out_unlock;
2135
2136 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2137 /* Do we want this tracer to start on bootup? */
2138 tracing_set_tracer(&global_trace, type->name);
2139 default_bootup_tracer = NULL;
2140
2141 apply_trace_boot_options();
2142
2143 /* disable other selftests, since this will break it. */
2144 disable_tracing_selftest("running a tracer");
2145
2146 out_unlock:
2147 return ret;
2148 }
2149
2150 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2151 {
2152 struct trace_buffer *buffer = buf->buffer;
2153
2154 if (!buffer)
2155 return;
2156
2157 ring_buffer_record_disable(buffer);
2158
2159 /* Make sure all commits have finished */
2160 synchronize_rcu();
2161 ring_buffer_reset_cpu(buffer, cpu);
2162
2163 ring_buffer_record_enable(buffer);
2164 }
2165
2166 void tracing_reset_online_cpus(struct array_buffer *buf)
2167 {
2168 struct trace_buffer *buffer = buf->buffer;
2169
2170 if (!buffer)
2171 return;
2172
2173 ring_buffer_record_disable(buffer);
2174
2175 /* Make sure all commits have finished */
2176 synchronize_rcu();
2177
2178 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2179
2180 ring_buffer_reset_online_cpus(buffer);
2181
2182 ring_buffer_record_enable(buffer);
2183 }
2184
2185 /* Must have trace_types_lock held */
2186 void tracing_reset_all_online_cpus(void)
2187 {
2188 struct trace_array *tr;
2189
2190 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2191 if (!tr->clear_trace)
2192 continue;
2193 tr->clear_trace = false;
2194 tracing_reset_online_cpus(&tr->array_buffer);
2195 #ifdef CONFIG_TRACER_MAX_TRACE
2196 tracing_reset_online_cpus(&tr->max_buffer);
2197 #endif
2198 }
2199 }
2200
2201 /*
2202 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2203 * is the tgid last observed corresponding to pid=i.
2204 */
2205 static int *tgid_map;
2206
2207 /* The maximum valid index into tgid_map. */
2208 static size_t tgid_map_max;
2209
2210 #define SAVED_CMDLINES_DEFAULT 128
2211 #define NO_CMDLINE_MAP UINT_MAX
2212 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2213 struct saved_cmdlines_buffer {
2214 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2215 unsigned *map_cmdline_to_pid;
2216 unsigned cmdline_num;
2217 int cmdline_idx;
2218 char *saved_cmdlines;
2219 };
2220 static struct saved_cmdlines_buffer *savedcmd;
2221
2222 static inline char *get_saved_cmdlines(int idx)
2223 {
2224 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2225 }
2226
2227 static inline void set_cmdline(int idx, const char *cmdline)
2228 {
2229 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2230 }
2231
2232 static int allocate_cmdlines_buffer(unsigned int val,
2233 struct saved_cmdlines_buffer *s)
2234 {
2235 s->map_cmdline_to_pid = kmalloc_array(val,
2236 sizeof(*s->map_cmdline_to_pid),
2237 GFP_KERNEL);
2238 if (!s->map_cmdline_to_pid)
2239 return -ENOMEM;
2240
2241 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2242 if (!s->saved_cmdlines) {
2243 kfree(s->map_cmdline_to_pid);
2244 return -ENOMEM;
2245 }
2246
2247 s->cmdline_idx = 0;
2248 s->cmdline_num = val;
2249 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2250 sizeof(s->map_pid_to_cmdline));
2251 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2252 val * sizeof(*s->map_cmdline_to_pid));
2253
2254 return 0;
2255 }
2256
2257 static int trace_create_savedcmd(void)
2258 {
2259 int ret;
2260
2261 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2262 if (!savedcmd)
2263 return -ENOMEM;
2264
2265 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2266 if (ret < 0) {
2267 kfree(savedcmd);
2268 savedcmd = NULL;
2269 return -ENOMEM;
2270 }
2271
2272 return 0;
2273 }
2274
2275 int is_tracing_stopped(void)
2276 {
2277 return global_trace.stop_count;
2278 }
2279
2280 /**
2281 * tracing_start - quick start of the tracer
2282 *
2283 * If tracing is enabled but was stopped by tracing_stop,
2284 * this will start the tracer back up.
2285 */
2286 void tracing_start(void)
2287 {
2288 struct trace_buffer *buffer;
2289 unsigned long flags;
2290
2291 if (tracing_disabled)
2292 return;
2293
2294 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2295 if (--global_trace.stop_count) {
2296 if (global_trace.stop_count < 0) {
2297 /* Someone screwed up their debugging */
2298 WARN_ON_ONCE(1);
2299 global_trace.stop_count = 0;
2300 }
2301 goto out;
2302 }
2303
2304 /* Prevent the buffers from switching */
2305 arch_spin_lock(&global_trace.max_lock);
2306
2307 buffer = global_trace.array_buffer.buffer;
2308 if (buffer)
2309 ring_buffer_record_enable(buffer);
2310
2311 #ifdef CONFIG_TRACER_MAX_TRACE
2312 buffer = global_trace.max_buffer.buffer;
2313 if (buffer)
2314 ring_buffer_record_enable(buffer);
2315 #endif
2316
2317 arch_spin_unlock(&global_trace.max_lock);
2318
2319 out:
2320 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2321 }
2322
2323 static void tracing_start_tr(struct trace_array *tr)
2324 {
2325 struct trace_buffer *buffer;
2326 unsigned long flags;
2327
2328 if (tracing_disabled)
2329 return;
2330
2331 /* If global, we need to also start the max tracer */
2332 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2333 return tracing_start();
2334
2335 raw_spin_lock_irqsave(&tr->start_lock, flags);
2336
2337 if (--tr->stop_count) {
2338 if (tr->stop_count < 0) {
2339 /* Someone screwed up their debugging */
2340 WARN_ON_ONCE(1);
2341 tr->stop_count = 0;
2342 }
2343 goto out;
2344 }
2345
2346 buffer = tr->array_buffer.buffer;
2347 if (buffer)
2348 ring_buffer_record_enable(buffer);
2349
2350 out:
2351 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2352 }
2353
2354 /**
2355 * tracing_stop - quick stop of the tracer
2356 *
2357 * Light weight way to stop tracing. Use in conjunction with
2358 * tracing_start.
2359 */
2360 void tracing_stop(void)
2361 {
2362 struct trace_buffer *buffer;
2363 unsigned long flags;
2364
2365 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2366 if (global_trace.stop_count++)
2367 goto out;
2368
2369 /* Prevent the buffers from switching */
2370 arch_spin_lock(&global_trace.max_lock);
2371
2372 buffer = global_trace.array_buffer.buffer;
2373 if (buffer)
2374 ring_buffer_record_disable(buffer);
2375
2376 #ifdef CONFIG_TRACER_MAX_TRACE
2377 buffer = global_trace.max_buffer.buffer;
2378 if (buffer)
2379 ring_buffer_record_disable(buffer);
2380 #endif
2381
2382 arch_spin_unlock(&global_trace.max_lock);
2383
2384 out:
2385 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2386 }
2387
2388 static void tracing_stop_tr(struct trace_array *tr)
2389 {
2390 struct trace_buffer *buffer;
2391 unsigned long flags;
2392
2393 /* If global, we need to also stop the max tracer */
2394 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2395 return tracing_stop();
2396
2397 raw_spin_lock_irqsave(&tr->start_lock, flags);
2398 if (tr->stop_count++)
2399 goto out;
2400
2401 buffer = tr->array_buffer.buffer;
2402 if (buffer)
2403 ring_buffer_record_disable(buffer);
2404
2405 out:
2406 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2407 }
2408
2409 static int trace_save_cmdline(struct task_struct *tsk)
2410 {
2411 unsigned tpid, idx;
2412
2413 /* treat recording of idle task as a success */
2414 if (!tsk->pid)
2415 return 1;
2416
2417 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2418
2419 /*
2420 * It's not the end of the world if we don't get
2421 * the lock, but we also don't want to spin
2422 * nor do we want to disable interrupts,
2423 * so if we miss here, then better luck next time.
2424 */
2425 if (!arch_spin_trylock(&trace_cmdline_lock))
2426 return 0;
2427
2428 idx = savedcmd->map_pid_to_cmdline[tpid];
2429 if (idx == NO_CMDLINE_MAP) {
2430 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2431
2432 savedcmd->map_pid_to_cmdline[tpid] = idx;
2433 savedcmd->cmdline_idx = idx;
2434 }
2435
2436 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2437 set_cmdline(idx, tsk->comm);
2438
2439 arch_spin_unlock(&trace_cmdline_lock);
2440
2441 return 1;
2442 }
2443
2444 static void __trace_find_cmdline(int pid, char comm[])
2445 {
2446 unsigned map;
2447 int tpid;
2448
2449 if (!pid) {
2450 strcpy(comm, "<idle>");
2451 return;
2452 }
2453
2454 if (WARN_ON_ONCE(pid < 0)) {
2455 strcpy(comm, "<XXX>");
2456 return;
2457 }
2458
2459 tpid = pid & (PID_MAX_DEFAULT - 1);
2460 map = savedcmd->map_pid_to_cmdline[tpid];
2461 if (map != NO_CMDLINE_MAP) {
2462 tpid = savedcmd->map_cmdline_to_pid[map];
2463 if (tpid == pid) {
2464 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2465 return;
2466 }
2467 }
2468 strcpy(comm, "<...>");
2469 }
2470
2471 void trace_find_cmdline(int pid, char comm[])
2472 {
2473 preempt_disable();
2474 arch_spin_lock(&trace_cmdline_lock);
2475
2476 __trace_find_cmdline(pid, comm);
2477
2478 arch_spin_unlock(&trace_cmdline_lock);
2479 preempt_enable();
2480 }
2481
2482 static int *trace_find_tgid_ptr(int pid)
2483 {
2484 /*
2485 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2486 * if we observe a non-NULL tgid_map then we also observe the correct
2487 * tgid_map_max.
2488 */
2489 int *map = smp_load_acquire(&tgid_map);
2490
2491 if (unlikely(!map || pid > tgid_map_max))
2492 return NULL;
2493
2494 return &map[pid];
2495 }
2496
2497 int trace_find_tgid(int pid)
2498 {
2499 int *ptr = trace_find_tgid_ptr(pid);
2500
2501 return ptr ? *ptr : 0;
2502 }
2503
2504 static int trace_save_tgid(struct task_struct *tsk)
2505 {
2506 int *ptr;
2507
2508 /* treat recording of idle task as a success */
2509 if (!tsk->pid)
2510 return 1;
2511
2512 ptr = trace_find_tgid_ptr(tsk->pid);
2513 if (!ptr)
2514 return 0;
2515
2516 *ptr = tsk->tgid;
2517 return 1;
2518 }
2519
2520 static bool tracing_record_taskinfo_skip(int flags)
2521 {
2522 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2523 return true;
2524 if (!__this_cpu_read(trace_taskinfo_save))
2525 return true;
2526 return false;
2527 }
2528
2529 /**
2530 * tracing_record_taskinfo - record the task info of a task
2531 *
2532 * @task: task to record
2533 * @flags: TRACE_RECORD_CMDLINE for recording comm
2534 * TRACE_RECORD_TGID for recording tgid
2535 */
2536 void tracing_record_taskinfo(struct task_struct *task, int flags)
2537 {
2538 bool done;
2539
2540 if (tracing_record_taskinfo_skip(flags))
2541 return;
2542
2543 /*
2544 * Record as much task information as possible. If some fail, continue
2545 * to try to record the others.
2546 */
2547 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2548 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2549
2550 /* If recording any information failed, retry again soon. */
2551 if (!done)
2552 return;
2553
2554 __this_cpu_write(trace_taskinfo_save, false);
2555 }
2556
2557 /**
2558 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2559 *
2560 * @prev: previous task during sched_switch
2561 * @next: next task during sched_switch
2562 * @flags: TRACE_RECORD_CMDLINE for recording comm
2563 * TRACE_RECORD_TGID for recording tgid
2564 */
2565 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2566 struct task_struct *next, int flags)
2567 {
2568 bool done;
2569
2570 if (tracing_record_taskinfo_skip(flags))
2571 return;
2572
2573 /*
2574 * Record as much task information as possible. If some fail, continue
2575 * to try to record the others.
2576 */
2577 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2578 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2579 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2580 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2581
2582 /* If recording any information failed, retry again soon. */
2583 if (!done)
2584 return;
2585
2586 __this_cpu_write(trace_taskinfo_save, false);
2587 }
2588
2589 /* Helpers to record a specific task information */
2590 void tracing_record_cmdline(struct task_struct *task)
2591 {
2592 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2593 }
2594
2595 void tracing_record_tgid(struct task_struct *task)
2596 {
2597 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2598 }
2599
2600 /*
2601 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2602 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2603 * simplifies those functions and keeps them in sync.
2604 */
2605 enum print_line_t trace_handle_return(struct trace_seq *s)
2606 {
2607 return trace_seq_has_overflowed(s) ?
2608 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2609 }
2610 EXPORT_SYMBOL_GPL(trace_handle_return);
2611
2612 static unsigned short migration_disable_value(void)
2613 {
2614 #if defined(CONFIG_SMP)
2615 return current->migration_disabled;
2616 #else
2617 return 0;
2618 #endif
2619 }
2620
2621 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2622 {
2623 unsigned int trace_flags = irqs_status;
2624 unsigned int pc;
2625
2626 pc = preempt_count();
2627
2628 if (pc & NMI_MASK)
2629 trace_flags |= TRACE_FLAG_NMI;
2630 if (pc & HARDIRQ_MASK)
2631 trace_flags |= TRACE_FLAG_HARDIRQ;
2632 if (in_serving_softirq())
2633 trace_flags |= TRACE_FLAG_SOFTIRQ;
2634
2635 if (tif_need_resched())
2636 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2637 if (test_preempt_need_resched())
2638 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2639 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2640 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2641 }
2642
2643 struct ring_buffer_event *
2644 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2645 int type,
2646 unsigned long len,
2647 unsigned int trace_ctx)
2648 {
2649 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2650 }
2651
2652 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2653 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2654 static int trace_buffered_event_ref;
2655
2656 /**
2657 * trace_buffered_event_enable - enable buffering events
2658 *
2659 * When events are being filtered, it is quicker to use a temporary
2660 * buffer to write the event data into if there's a likely chance
2661 * that it will not be committed. The discard of the ring buffer
2662 * is not as fast as committing, and is much slower than copying
2663 * a commit.
2664 *
2665 * When an event is to be filtered, allocate per cpu buffers to
2666 * write the event data into, and if the event is filtered and discarded
2667 * it is simply dropped, otherwise, the entire data is to be committed
2668 * in one shot.
2669 */
2670 void trace_buffered_event_enable(void)
2671 {
2672 struct ring_buffer_event *event;
2673 struct page *page;
2674 int cpu;
2675
2676 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2677
2678 if (trace_buffered_event_ref++)
2679 return;
2680
2681 for_each_tracing_cpu(cpu) {
2682 page = alloc_pages_node(cpu_to_node(cpu),
2683 GFP_KERNEL | __GFP_NORETRY, 0);
2684 if (!page)
2685 goto failed;
2686
2687 event = page_address(page);
2688 memset(event, 0, sizeof(*event));
2689
2690 per_cpu(trace_buffered_event, cpu) = event;
2691
2692 preempt_disable();
2693 if (cpu == smp_processor_id() &&
2694 __this_cpu_read(trace_buffered_event) !=
2695 per_cpu(trace_buffered_event, cpu))
2696 WARN_ON_ONCE(1);
2697 preempt_enable();
2698 }
2699
2700 return;
2701 failed:
2702 trace_buffered_event_disable();
2703 }
2704
2705 static void enable_trace_buffered_event(void *data)
2706 {
2707 /* Probably not needed, but do it anyway */
2708 smp_rmb();
2709 this_cpu_dec(trace_buffered_event_cnt);
2710 }
2711
2712 static void disable_trace_buffered_event(void *data)
2713 {
2714 this_cpu_inc(trace_buffered_event_cnt);
2715 }
2716
2717 /**
2718 * trace_buffered_event_disable - disable buffering events
2719 *
2720 * When a filter is removed, it is faster to not use the buffered
2721 * events, and to commit directly into the ring buffer. Free up
2722 * the temp buffers when there are no more users. This requires
2723 * special synchronization with current events.
2724 */
2725 void trace_buffered_event_disable(void)
2726 {
2727 int cpu;
2728
2729 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2730
2731 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2732 return;
2733
2734 if (--trace_buffered_event_ref)
2735 return;
2736
2737 preempt_disable();
2738 /* For each CPU, set the buffer as used. */
2739 smp_call_function_many(tracing_buffer_mask,
2740 disable_trace_buffered_event, NULL, 1);
2741 preempt_enable();
2742
2743 /* Wait for all current users to finish */
2744 synchronize_rcu();
2745
2746 for_each_tracing_cpu(cpu) {
2747 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2748 per_cpu(trace_buffered_event, cpu) = NULL;
2749 }
2750 /*
2751 * Make sure trace_buffered_event is NULL before clearing
2752 * trace_buffered_event_cnt.
2753 */
2754 smp_wmb();
2755
2756 preempt_disable();
2757 /* Do the work on each cpu */
2758 smp_call_function_many(tracing_buffer_mask,
2759 enable_trace_buffered_event, NULL, 1);
2760 preempt_enable();
2761 }
2762
2763 static struct trace_buffer *temp_buffer;
2764
2765 struct ring_buffer_event *
2766 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2767 struct trace_event_file *trace_file,
2768 int type, unsigned long len,
2769 unsigned int trace_ctx)
2770 {
2771 struct ring_buffer_event *entry;
2772 struct trace_array *tr = trace_file->tr;
2773 int val;
2774
2775 *current_rb = tr->array_buffer.buffer;
2776
2777 if (!tr->no_filter_buffering_ref &&
2778 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2779 (entry = this_cpu_read(trace_buffered_event))) {
2780 /*
2781 * Filtering is on, so try to use the per cpu buffer first.
2782 * This buffer will simulate a ring_buffer_event,
2783 * where the type_len is zero and the array[0] will
2784 * hold the full length.
2785 * (see include/linux/ring-buffer.h for details on
2786 * how the ring_buffer_event is structured).
2787 *
2788 * Using a temp buffer during filtering and copying it
2789 * on a matched filter is quicker than writing directly
2790 * into the ring buffer and then discarding it when
2791 * it doesn't match. That is because the discard
2792 * requires several atomic operations to get right.
2793 * Copying on match and doing nothing on a failed match
2794 * is still quicker than no copy on match, but having
2795 * to discard out of the ring buffer on a failed match.
2796 */
2797 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2798
2799 val = this_cpu_inc_return(trace_buffered_event_cnt);
2800
2801 /*
2802 * Preemption is disabled, but interrupts and NMIs
2803 * can still come in now. If that happens after
2804 * the above increment, then it will have to go
2805 * back to the old method of allocating the event
2806 * on the ring buffer, and if the filter fails, it
2807 * will have to call ring_buffer_discard_commit()
2808 * to remove it.
2809 *
2810 * Need to also check the unlikely case that the
2811 * length is bigger than the temp buffer size.
2812 * If that happens, then the reserve is pretty much
2813 * guaranteed to fail, as the ring buffer currently
2814 * only allows events less than a page. But that may
2815 * change in the future, so let the ring buffer reserve
2816 * handle the failure in that case.
2817 */
2818 if (val == 1 && likely(len <= max_len)) {
2819 trace_event_setup(entry, type, trace_ctx);
2820 entry->array[0] = len;
2821 return entry;
2822 }
2823 this_cpu_dec(trace_buffered_event_cnt);
2824 }
2825
2826 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2827 trace_ctx);
2828 /*
2829 * If tracing is off, but we have triggers enabled
2830 * we still need to look at the event data. Use the temp_buffer
2831 * to store the trace event for the trigger to use. It's recursive
2832 * safe and will not be recorded anywhere.
2833 */
2834 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2835 *current_rb = temp_buffer;
2836 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2837 trace_ctx);
2838 }
2839 return entry;
2840 }
2841 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2842
2843 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2844 static DEFINE_MUTEX(tracepoint_printk_mutex);
2845
2846 static void output_printk(struct trace_event_buffer *fbuffer)
2847 {
2848 struct trace_event_call *event_call;
2849 struct trace_event_file *file;
2850 struct trace_event *event;
2851 unsigned long flags;
2852 struct trace_iterator *iter = tracepoint_print_iter;
2853
2854 /* We should never get here if iter is NULL */
2855 if (WARN_ON_ONCE(!iter))
2856 return;
2857
2858 event_call = fbuffer->trace_file->event_call;
2859 if (!event_call || !event_call->event.funcs ||
2860 !event_call->event.funcs->trace)
2861 return;
2862
2863 file = fbuffer->trace_file;
2864 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2865 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2866 !filter_match_preds(file->filter, fbuffer->entry)))
2867 return;
2868
2869 event = &fbuffer->trace_file->event_call->event;
2870
2871 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2872 trace_seq_init(&iter->seq);
2873 iter->ent = fbuffer->entry;
2874 event_call->event.funcs->trace(iter, 0, event);
2875 trace_seq_putc(&iter->seq, 0);
2876 printk("%s", iter->seq.buffer);
2877
2878 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2879 }
2880
2881 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2882 void *buffer, size_t *lenp,
2883 loff_t *ppos)
2884 {
2885 int save_tracepoint_printk;
2886 int ret;
2887
2888 mutex_lock(&tracepoint_printk_mutex);
2889 save_tracepoint_printk = tracepoint_printk;
2890
2891 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2892
2893 /*
2894 * This will force exiting early, as tracepoint_printk
2895 * is always zero when tracepoint_printk_iter is not allocated
2896 */
2897 if (!tracepoint_print_iter)
2898 tracepoint_printk = 0;
2899
2900 if (save_tracepoint_printk == tracepoint_printk)
2901 goto out;
2902
2903 if (tracepoint_printk)
2904 static_key_enable(&tracepoint_printk_key.key);
2905 else
2906 static_key_disable(&tracepoint_printk_key.key);
2907
2908 out:
2909 mutex_unlock(&tracepoint_printk_mutex);
2910
2911 return ret;
2912 }
2913
2914 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2915 {
2916 enum event_trigger_type tt = ETT_NONE;
2917 struct trace_event_file *file = fbuffer->trace_file;
2918
2919 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2920 fbuffer->entry, &tt))
2921 goto discard;
2922
2923 if (static_key_false(&tracepoint_printk_key.key))
2924 output_printk(fbuffer);
2925
2926 if (static_branch_unlikely(&trace_event_exports_enabled))
2927 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2928
2929 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2930 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2931
2932 discard:
2933 if (tt)
2934 event_triggers_post_call(file, tt);
2935
2936 }
2937 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2938
2939 /*
2940 * Skip 3:
2941 *
2942 * trace_buffer_unlock_commit_regs()
2943 * trace_event_buffer_commit()
2944 * trace_event_raw_event_xxx()
2945 */
2946 # define STACK_SKIP 3
2947
2948 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2949 struct trace_buffer *buffer,
2950 struct ring_buffer_event *event,
2951 unsigned int trace_ctx,
2952 struct pt_regs *regs)
2953 {
2954 __buffer_unlock_commit(buffer, event);
2955
2956 /*
2957 * If regs is not set, then skip the necessary functions.
2958 * Note, we can still get here via blktrace, wakeup tracer
2959 * and mmiotrace, but that's ok if they lose a function or
2960 * two. They are not that meaningful.
2961 */
2962 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2963 ftrace_trace_userstack(tr, buffer, trace_ctx);
2964 }
2965
2966 /*
2967 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2968 */
2969 void
2970 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2971 struct ring_buffer_event *event)
2972 {
2973 __buffer_unlock_commit(buffer, event);
2974 }
2975
2976 void
2977 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2978 parent_ip, unsigned int trace_ctx)
2979 {
2980 struct trace_event_call *call = &event_function;
2981 struct trace_buffer *buffer = tr->array_buffer.buffer;
2982 struct ring_buffer_event *event;
2983 struct ftrace_entry *entry;
2984
2985 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2986 trace_ctx);
2987 if (!event)
2988 return;
2989 entry = ring_buffer_event_data(event);
2990 entry->ip = ip;
2991 entry->parent_ip = parent_ip;
2992
2993 if (!call_filter_check_discard(call, entry, buffer, event)) {
2994 if (static_branch_unlikely(&trace_function_exports_enabled))
2995 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2996 __buffer_unlock_commit(buffer, event);
2997 }
2998 }
2999
3000 #ifdef CONFIG_STACKTRACE
3001
3002 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3003 #define FTRACE_KSTACK_NESTING 4
3004
3005 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3006
3007 struct ftrace_stack {
3008 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3009 };
3010
3011
3012 struct ftrace_stacks {
3013 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3014 };
3015
3016 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3017 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3018
3019 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3020 unsigned int trace_ctx,
3021 int skip, struct pt_regs *regs)
3022 {
3023 struct trace_event_call *call = &event_kernel_stack;
3024 struct ring_buffer_event *event;
3025 unsigned int size, nr_entries;
3026 struct ftrace_stack *fstack;
3027 struct stack_entry *entry;
3028 int stackidx;
3029
3030 /*
3031 * Add one, for this function and the call to save_stack_trace()
3032 * If regs is set, then these functions will not be in the way.
3033 */
3034 #ifndef CONFIG_UNWINDER_ORC
3035 if (!regs)
3036 skip++;
3037 #endif
3038
3039 preempt_disable_notrace();
3040
3041 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3042
3043 /* This should never happen. If it does, yell once and skip */
3044 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3045 goto out;
3046
3047 /*
3048 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3049 * interrupt will either see the value pre increment or post
3050 * increment. If the interrupt happens pre increment it will have
3051 * restored the counter when it returns. We just need a barrier to
3052 * keep gcc from moving things around.
3053 */
3054 barrier();
3055
3056 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3057 size = ARRAY_SIZE(fstack->calls);
3058
3059 if (regs) {
3060 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3061 size, skip);
3062 } else {
3063 nr_entries = stack_trace_save(fstack->calls, size, skip);
3064 }
3065
3066 size = nr_entries * sizeof(unsigned long);
3067 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3068 (sizeof(*entry) - sizeof(entry->caller)) + size,
3069 trace_ctx);
3070 if (!event)
3071 goto out;
3072 entry = ring_buffer_event_data(event);
3073
3074 memcpy(&entry->caller, fstack->calls, size);
3075 entry->size = nr_entries;
3076
3077 if (!call_filter_check_discard(call, entry, buffer, event))
3078 __buffer_unlock_commit(buffer, event);
3079
3080 out:
3081 /* Again, don't let gcc optimize things here */
3082 barrier();
3083 __this_cpu_dec(ftrace_stack_reserve);
3084 preempt_enable_notrace();
3085
3086 }
3087
3088 static inline void ftrace_trace_stack(struct trace_array *tr,
3089 struct trace_buffer *buffer,
3090 unsigned int trace_ctx,
3091 int skip, struct pt_regs *regs)
3092 {
3093 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3094 return;
3095
3096 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3097 }
3098
3099 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3100 int skip)
3101 {
3102 struct trace_buffer *buffer = tr->array_buffer.buffer;
3103
3104 if (rcu_is_watching()) {
3105 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3106 return;
3107 }
3108
3109 /*
3110 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3111 * but if the above rcu_is_watching() failed, then the NMI
3112 * triggered someplace critical, and rcu_irq_enter() should
3113 * not be called from NMI.
3114 */
3115 if (unlikely(in_nmi()))
3116 return;
3117
3118 rcu_irq_enter_irqson();
3119 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3120 rcu_irq_exit_irqson();
3121 }
3122
3123 /**
3124 * trace_dump_stack - record a stack back trace in the trace buffer
3125 * @skip: Number of functions to skip (helper handlers)
3126 */
3127 void trace_dump_stack(int skip)
3128 {
3129 if (tracing_disabled || tracing_selftest_running)
3130 return;
3131
3132 #ifndef CONFIG_UNWINDER_ORC
3133 /* Skip 1 to skip this function. */
3134 skip++;
3135 #endif
3136 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3137 tracing_gen_ctx(), skip, NULL);
3138 }
3139 EXPORT_SYMBOL_GPL(trace_dump_stack);
3140
3141 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3142 static DEFINE_PER_CPU(int, user_stack_count);
3143
3144 static void
3145 ftrace_trace_userstack(struct trace_array *tr,
3146 struct trace_buffer *buffer, unsigned int trace_ctx)
3147 {
3148 struct trace_event_call *call = &event_user_stack;
3149 struct ring_buffer_event *event;
3150 struct userstack_entry *entry;
3151
3152 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3153 return;
3154
3155 /*
3156 * NMIs can not handle page faults, even with fix ups.
3157 * The save user stack can (and often does) fault.
3158 */
3159 if (unlikely(in_nmi()))
3160 return;
3161
3162 /*
3163 * prevent recursion, since the user stack tracing may
3164 * trigger other kernel events.
3165 */
3166 preempt_disable();
3167 if (__this_cpu_read(user_stack_count))
3168 goto out;
3169
3170 __this_cpu_inc(user_stack_count);
3171
3172 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3173 sizeof(*entry), trace_ctx);
3174 if (!event)
3175 goto out_drop_count;
3176 entry = ring_buffer_event_data(event);
3177
3178 entry->tgid = current->tgid;
3179 memset(&entry->caller, 0, sizeof(entry->caller));
3180
3181 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3182 if (!call_filter_check_discard(call, entry, buffer, event))
3183 __buffer_unlock_commit(buffer, event);
3184
3185 out_drop_count:
3186 __this_cpu_dec(user_stack_count);
3187 out:
3188 preempt_enable();
3189 }
3190 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3191 static void ftrace_trace_userstack(struct trace_array *tr,
3192 struct trace_buffer *buffer,
3193 unsigned int trace_ctx)
3194 {
3195 }
3196 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3197
3198 #endif /* CONFIG_STACKTRACE */
3199
3200 static inline void
3201 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3202 unsigned long long delta)
3203 {
3204 entry->bottom_delta_ts = delta & U32_MAX;
3205 entry->top_delta_ts = (delta >> 32);
3206 }
3207
3208 void trace_last_func_repeats(struct trace_array *tr,
3209 struct trace_func_repeats *last_info,
3210 unsigned int trace_ctx)
3211 {
3212 struct trace_buffer *buffer = tr->array_buffer.buffer;
3213 struct func_repeats_entry *entry;
3214 struct ring_buffer_event *event;
3215 u64 delta;
3216
3217 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3218 sizeof(*entry), trace_ctx);
3219 if (!event)
3220 return;
3221
3222 delta = ring_buffer_event_time_stamp(buffer, event) -
3223 last_info->ts_last_call;
3224
3225 entry = ring_buffer_event_data(event);
3226 entry->ip = last_info->ip;
3227 entry->parent_ip = last_info->parent_ip;
3228 entry->count = last_info->count;
3229 func_repeats_set_delta_ts(entry, delta);
3230
3231 __buffer_unlock_commit(buffer, event);
3232 }
3233
3234 /* created for use with alloc_percpu */
3235 struct trace_buffer_struct {
3236 int nesting;
3237 char buffer[4][TRACE_BUF_SIZE];
3238 };
3239
3240 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3241
3242 /*
3243 * This allows for lockless recording. If we're nested too deeply, then
3244 * this returns NULL.
3245 */
3246 static char *get_trace_buf(void)
3247 {
3248 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3249
3250 if (!trace_percpu_buffer || buffer->nesting >= 4)
3251 return NULL;
3252
3253 buffer->nesting++;
3254
3255 /* Interrupts must see nesting incremented before we use the buffer */
3256 barrier();
3257 return &buffer->buffer[buffer->nesting - 1][0];
3258 }
3259
3260 static void put_trace_buf(void)
3261 {
3262 /* Don't let the decrement of nesting leak before this */
3263 barrier();
3264 this_cpu_dec(trace_percpu_buffer->nesting);
3265 }
3266
3267 static int alloc_percpu_trace_buffer(void)
3268 {
3269 struct trace_buffer_struct __percpu *buffers;
3270
3271 if (trace_percpu_buffer)
3272 return 0;
3273
3274 buffers = alloc_percpu(struct trace_buffer_struct);
3275 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3276 return -ENOMEM;
3277
3278 trace_percpu_buffer = buffers;
3279 return 0;
3280 }
3281
3282 static int buffers_allocated;
3283
3284 void trace_printk_init_buffers(void)
3285 {
3286 if (buffers_allocated)
3287 return;
3288
3289 if (alloc_percpu_trace_buffer())
3290 return;
3291
3292 /* trace_printk() is for debug use only. Don't use it in production. */
3293
3294 pr_warn("\n");
3295 pr_warn("**********************************************************\n");
3296 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3297 pr_warn("** **\n");
3298 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3299 pr_warn("** **\n");
3300 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3301 pr_warn("** unsafe for production use. **\n");
3302 pr_warn("** **\n");
3303 pr_warn("** If you see this message and you are not debugging **\n");
3304 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3305 pr_warn("** **\n");
3306 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3307 pr_warn("**********************************************************\n");
3308
3309 /* Expand the buffers to set size */
3310 tracing_update_buffers();
3311
3312 buffers_allocated = 1;
3313
3314 /*
3315 * trace_printk_init_buffers() can be called by modules.
3316 * If that happens, then we need to start cmdline recording
3317 * directly here. If the global_trace.buffer is already
3318 * allocated here, then this was called by module code.
3319 */
3320 if (global_trace.array_buffer.buffer)
3321 tracing_start_cmdline_record();
3322 }
3323 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3324
3325 void trace_printk_start_comm(void)
3326 {
3327 /* Start tracing comms if trace printk is set */
3328 if (!buffers_allocated)
3329 return;
3330 tracing_start_cmdline_record();
3331 }
3332
3333 static void trace_printk_start_stop_comm(int enabled)
3334 {
3335 if (!buffers_allocated)
3336 return;
3337
3338 if (enabled)
3339 tracing_start_cmdline_record();
3340 else
3341 tracing_stop_cmdline_record();
3342 }
3343
3344 /**
3345 * trace_vbprintk - write binary msg to tracing buffer
3346 * @ip: The address of the caller
3347 * @fmt: The string format to write to the buffer
3348 * @args: Arguments for @fmt
3349 */
3350 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3351 {
3352 struct trace_event_call *call = &event_bprint;
3353 struct ring_buffer_event *event;
3354 struct trace_buffer *buffer;
3355 struct trace_array *tr = &global_trace;
3356 struct bprint_entry *entry;
3357 unsigned int trace_ctx;
3358 char *tbuffer;
3359 int len = 0, size;
3360
3361 if (unlikely(tracing_selftest_running || tracing_disabled))
3362 return 0;
3363
3364 /* Don't pollute graph traces with trace_vprintk internals */
3365 pause_graph_tracing();
3366
3367 trace_ctx = tracing_gen_ctx();
3368 preempt_disable_notrace();
3369
3370 tbuffer = get_trace_buf();
3371 if (!tbuffer) {
3372 len = 0;
3373 goto out_nobuffer;
3374 }
3375
3376 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3377
3378 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3379 goto out_put;
3380
3381 size = sizeof(*entry) + sizeof(u32) * len;
3382 buffer = tr->array_buffer.buffer;
3383 ring_buffer_nest_start(buffer);
3384 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3385 trace_ctx);
3386 if (!event)
3387 goto out;
3388 entry = ring_buffer_event_data(event);
3389 entry->ip = ip;
3390 entry->fmt = fmt;
3391
3392 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3393 if (!call_filter_check_discard(call, entry, buffer, event)) {
3394 __buffer_unlock_commit(buffer, event);
3395 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3396 }
3397
3398 out:
3399 ring_buffer_nest_end(buffer);
3400 out_put:
3401 put_trace_buf();
3402
3403 out_nobuffer:
3404 preempt_enable_notrace();
3405 unpause_graph_tracing();
3406
3407 return len;
3408 }
3409 EXPORT_SYMBOL_GPL(trace_vbprintk);
3410
3411 __printf(3, 0)
3412 static int
3413 __trace_array_vprintk(struct trace_buffer *buffer,
3414 unsigned long ip, const char *fmt, va_list args)
3415 {
3416 struct trace_event_call *call = &event_print;
3417 struct ring_buffer_event *event;
3418 int len = 0, size;
3419 struct print_entry *entry;
3420 unsigned int trace_ctx;
3421 char *tbuffer;
3422
3423 if (tracing_disabled || tracing_selftest_running)
3424 return 0;
3425
3426 /* Don't pollute graph traces with trace_vprintk internals */
3427 pause_graph_tracing();
3428
3429 trace_ctx = tracing_gen_ctx();
3430 preempt_disable_notrace();
3431
3432
3433 tbuffer = get_trace_buf();
3434 if (!tbuffer) {
3435 len = 0;
3436 goto out_nobuffer;
3437 }
3438
3439 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3440
3441 size = sizeof(*entry) + len + 1;
3442 ring_buffer_nest_start(buffer);
3443 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3444 trace_ctx);
3445 if (!event)
3446 goto out;
3447 entry = ring_buffer_event_data(event);
3448 entry->ip = ip;
3449
3450 memcpy(&entry->buf, tbuffer, len + 1);
3451 if (!call_filter_check_discard(call, entry, buffer, event)) {
3452 __buffer_unlock_commit(buffer, event);
3453 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3454 }
3455
3456 out:
3457 ring_buffer_nest_end(buffer);
3458 put_trace_buf();
3459
3460 out_nobuffer:
3461 preempt_enable_notrace();
3462 unpause_graph_tracing();
3463
3464 return len;
3465 }
3466
3467 __printf(3, 0)
3468 int trace_array_vprintk(struct trace_array *tr,
3469 unsigned long ip, const char *fmt, va_list args)
3470 {
3471 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3472 }
3473
3474 /**
3475 * trace_array_printk - Print a message to a specific instance
3476 * @tr: The instance trace_array descriptor
3477 * @ip: The instruction pointer that this is called from.
3478 * @fmt: The format to print (printf format)
3479 *
3480 * If a subsystem sets up its own instance, they have the right to
3481 * printk strings into their tracing instance buffer using this
3482 * function. Note, this function will not write into the top level
3483 * buffer (use trace_printk() for that), as writing into the top level
3484 * buffer should only have events that can be individually disabled.
3485 * trace_printk() is only used for debugging a kernel, and should not
3486 * be ever incorporated in normal use.
3487 *
3488 * trace_array_printk() can be used, as it will not add noise to the
3489 * top level tracing buffer.
3490 *
3491 * Note, trace_array_init_printk() must be called on @tr before this
3492 * can be used.
3493 */
3494 __printf(3, 0)
3495 int trace_array_printk(struct trace_array *tr,
3496 unsigned long ip, const char *fmt, ...)
3497 {
3498 int ret;
3499 va_list ap;
3500
3501 if (!tr)
3502 return -ENOENT;
3503
3504 /* This is only allowed for created instances */
3505 if (tr == &global_trace)
3506 return 0;
3507
3508 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3509 return 0;
3510
3511 va_start(ap, fmt);
3512 ret = trace_array_vprintk(tr, ip, fmt, ap);
3513 va_end(ap);
3514 return ret;
3515 }
3516 EXPORT_SYMBOL_GPL(trace_array_printk);
3517
3518 /**
3519 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3520 * @tr: The trace array to initialize the buffers for
3521 *
3522 * As trace_array_printk() only writes into instances, they are OK to
3523 * have in the kernel (unlike trace_printk()). This needs to be called
3524 * before trace_array_printk() can be used on a trace_array.
3525 */
3526 int trace_array_init_printk(struct trace_array *tr)
3527 {
3528 if (!tr)
3529 return -ENOENT;
3530
3531 /* This is only allowed for created instances */
3532 if (tr == &global_trace)
3533 return -EINVAL;
3534
3535 return alloc_percpu_trace_buffer();
3536 }
3537 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3538
3539 __printf(3, 4)
3540 int trace_array_printk_buf(struct trace_buffer *buffer,
3541 unsigned long ip, const char *fmt, ...)
3542 {
3543 int ret;
3544 va_list ap;
3545
3546 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3547 return 0;
3548
3549 va_start(ap, fmt);
3550 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3551 va_end(ap);
3552 return ret;
3553 }
3554
3555 __printf(2, 0)
3556 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3557 {
3558 return trace_array_vprintk(&global_trace, ip, fmt, args);
3559 }
3560 EXPORT_SYMBOL_GPL(trace_vprintk);
3561
3562 static void trace_iterator_increment(struct trace_iterator *iter)
3563 {
3564 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3565
3566 iter->idx++;
3567 if (buf_iter)
3568 ring_buffer_iter_advance(buf_iter);
3569 }
3570
3571 static struct trace_entry *
3572 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3573 unsigned long *lost_events)
3574 {
3575 struct ring_buffer_event *event;
3576 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3577
3578 if (buf_iter) {
3579 event = ring_buffer_iter_peek(buf_iter, ts);
3580 if (lost_events)
3581 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3582 (unsigned long)-1 : 0;
3583 } else {
3584 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3585 lost_events);
3586 }
3587
3588 if (event) {
3589 iter->ent_size = ring_buffer_event_length(event);
3590 return ring_buffer_event_data(event);
3591 }
3592 iter->ent_size = 0;
3593 return NULL;
3594 }
3595
3596 static struct trace_entry *
3597 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3598 unsigned long *missing_events, u64 *ent_ts)
3599 {
3600 struct trace_buffer *buffer = iter->array_buffer->buffer;
3601 struct trace_entry *ent, *next = NULL;
3602 unsigned long lost_events = 0, next_lost = 0;
3603 int cpu_file = iter->cpu_file;
3604 u64 next_ts = 0, ts;
3605 int next_cpu = -1;
3606 int next_size = 0;
3607 int cpu;
3608
3609 /*
3610 * If we are in a per_cpu trace file, don't bother by iterating over
3611 * all cpu and peek directly.
3612 */
3613 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3614 if (ring_buffer_empty_cpu(buffer, cpu_file))
3615 return NULL;
3616 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3617 if (ent_cpu)
3618 *ent_cpu = cpu_file;
3619
3620 return ent;
3621 }
3622
3623 for_each_tracing_cpu(cpu) {
3624
3625 if (ring_buffer_empty_cpu(buffer, cpu))
3626 continue;
3627
3628 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3629
3630 /*
3631 * Pick the entry with the smallest timestamp:
3632 */
3633 if (ent && (!next || ts < next_ts)) {
3634 next = ent;
3635 next_cpu = cpu;
3636 next_ts = ts;
3637 next_lost = lost_events;
3638 next_size = iter->ent_size;
3639 }
3640 }
3641
3642 iter->ent_size = next_size;
3643
3644 if (ent_cpu)
3645 *ent_cpu = next_cpu;
3646
3647 if (ent_ts)
3648 *ent_ts = next_ts;
3649
3650 if (missing_events)
3651 *missing_events = next_lost;
3652
3653 return next;
3654 }
3655
3656 #define STATIC_FMT_BUF_SIZE 128
3657 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3658
3659 static char *trace_iter_expand_format(struct trace_iterator *iter)
3660 {
3661 char *tmp;
3662
3663 /*
3664 * iter->tr is NULL when used with tp_printk, which makes
3665 * this get called where it is not safe to call krealloc().
3666 */
3667 if (!iter->tr || iter->fmt == static_fmt_buf)
3668 return NULL;
3669
3670 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3671 GFP_KERNEL);
3672 if (tmp) {
3673 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3674 iter->fmt = tmp;
3675 }
3676
3677 return tmp;
3678 }
3679
3680 /* Returns true if the string is safe to dereference from an event */
3681 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3682 bool star, int len)
3683 {
3684 unsigned long addr = (unsigned long)str;
3685 struct trace_event *trace_event;
3686 struct trace_event_call *event;
3687
3688 /* Ignore strings with no length */
3689 if (star && !len)
3690 return true;
3691
3692 /* OK if part of the event data */
3693 if ((addr >= (unsigned long)iter->ent) &&
3694 (addr < (unsigned long)iter->ent + iter->ent_size))
3695 return true;
3696
3697 /* OK if part of the temp seq buffer */
3698 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3699 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3700 return true;
3701
3702 /* Core rodata can not be freed */
3703 if (is_kernel_rodata(addr))
3704 return true;
3705
3706 if (trace_is_tracepoint_string(str))
3707 return true;
3708
3709 /*
3710 * Now this could be a module event, referencing core module
3711 * data, which is OK.
3712 */
3713 if (!iter->ent)
3714 return false;
3715
3716 trace_event = ftrace_find_event(iter->ent->type);
3717 if (!trace_event)
3718 return false;
3719
3720 event = container_of(trace_event, struct trace_event_call, event);
3721 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3722 return false;
3723
3724 /* Would rather have rodata, but this will suffice */
3725 if (within_module_core(addr, event->module))
3726 return true;
3727
3728 return false;
3729 }
3730
3731 static const char *show_buffer(struct trace_seq *s)
3732 {
3733 struct seq_buf *seq = &s->seq;
3734
3735 seq_buf_terminate(seq);
3736
3737 return seq->buffer;
3738 }
3739
3740 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3741
3742 static int test_can_verify_check(const char *fmt, ...)
3743 {
3744 char buf[16];
3745 va_list ap;
3746 int ret;
3747
3748 /*
3749 * The verifier is dependent on vsnprintf() modifies the va_list
3750 * passed to it, where it is sent as a reference. Some architectures
3751 * (like x86_32) passes it by value, which means that vsnprintf()
3752 * does not modify the va_list passed to it, and the verifier
3753 * would then need to be able to understand all the values that
3754 * vsnprintf can use. If it is passed by value, then the verifier
3755 * is disabled.
3756 */
3757 va_start(ap, fmt);
3758 vsnprintf(buf, 16, "%d", ap);
3759 ret = va_arg(ap, int);
3760 va_end(ap);
3761
3762 return ret;
3763 }
3764
3765 static void test_can_verify(void)
3766 {
3767 if (!test_can_verify_check("%d %d", 0, 1)) {
3768 pr_info("trace event string verifier disabled\n");
3769 static_branch_inc(&trace_no_verify);
3770 }
3771 }
3772
3773 /**
3774 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3775 * @iter: The iterator that holds the seq buffer and the event being printed
3776 * @fmt: The format used to print the event
3777 * @ap: The va_list holding the data to print from @fmt.
3778 *
3779 * This writes the data into the @iter->seq buffer using the data from
3780 * @fmt and @ap. If the format has a %s, then the source of the string
3781 * is examined to make sure it is safe to print, otherwise it will
3782 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3783 * pointer.
3784 */
3785 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3786 va_list ap)
3787 {
3788 const char *p = fmt;
3789 const char *str;
3790 int i, j;
3791
3792 if (WARN_ON_ONCE(!fmt))
3793 return;
3794
3795 if (static_branch_unlikely(&trace_no_verify))
3796 goto print;
3797
3798 /* Don't bother checking when doing a ftrace_dump() */
3799 if (iter->fmt == static_fmt_buf)
3800 goto print;
3801
3802 while (*p) {
3803 bool star = false;
3804 int len = 0;
3805
3806 j = 0;
3807
3808 /* We only care about %s and variants */
3809 for (i = 0; p[i]; i++) {
3810 if (i + 1 >= iter->fmt_size) {
3811 /*
3812 * If we can't expand the copy buffer,
3813 * just print it.
3814 */
3815 if (!trace_iter_expand_format(iter))
3816 goto print;
3817 }
3818
3819 if (p[i] == '\\' && p[i+1]) {
3820 i++;
3821 continue;
3822 }
3823 if (p[i] == '%') {
3824 /* Need to test cases like %08.*s */
3825 for (j = 1; p[i+j]; j++) {
3826 if (isdigit(p[i+j]) ||
3827 p[i+j] == '.')
3828 continue;
3829 if (p[i+j] == '*') {
3830 star = true;
3831 continue;
3832 }
3833 break;
3834 }
3835 if (p[i+j] == 's')
3836 break;
3837 star = false;
3838 }
3839 j = 0;
3840 }
3841 /* If no %s found then just print normally */
3842 if (!p[i])
3843 break;
3844
3845 /* Copy up to the %s, and print that */
3846 strncpy(iter->fmt, p, i);
3847 iter->fmt[i] = '\0';
3848 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3849
3850 /*
3851 * If iter->seq is full, the above call no longer guarantees
3852 * that ap is in sync with fmt processing, and further calls
3853 * to va_arg() can return wrong positional arguments.
3854 *
3855 * Ensure that ap is no longer used in this case.
3856 */
3857 if (iter->seq.full) {
3858 p = "";
3859 break;
3860 }
3861
3862 if (star)
3863 len = va_arg(ap, int);
3864
3865 /* The ap now points to the string data of the %s */
3866 str = va_arg(ap, const char *);
3867
3868 /*
3869 * If you hit this warning, it is likely that the
3870 * trace event in question used %s on a string that
3871 * was saved at the time of the event, but may not be
3872 * around when the trace is read. Use __string(),
3873 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3874 * instead. See samples/trace_events/trace-events-sample.h
3875 * for reference.
3876 */
3877 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3878 "fmt: '%s' current_buffer: '%s'",
3879 fmt, show_buffer(&iter->seq))) {
3880 int ret;
3881
3882 /* Try to safely read the string */
3883 if (star) {
3884 if (len + 1 > iter->fmt_size)
3885 len = iter->fmt_size - 1;
3886 if (len < 0)
3887 len = 0;
3888 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3889 iter->fmt[len] = 0;
3890 star = false;
3891 } else {
3892 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3893 iter->fmt_size);
3894 }
3895 if (ret < 0)
3896 trace_seq_printf(&iter->seq, "(0x%px)", str);
3897 else
3898 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3899 str, iter->fmt);
3900 str = "[UNSAFE-MEMORY]";
3901 strcpy(iter->fmt, "%s");
3902 } else {
3903 strncpy(iter->fmt, p + i, j + 1);
3904 iter->fmt[j+1] = '\0';
3905 }
3906 if (star)
3907 trace_seq_printf(&iter->seq, iter->fmt, len, str);
3908 else
3909 trace_seq_printf(&iter->seq, iter->fmt, str);
3910
3911 p += i + j + 1;
3912 }
3913 print:
3914 if (*p)
3915 trace_seq_vprintf(&iter->seq, p, ap);
3916 }
3917
3918 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3919 {
3920 const char *p, *new_fmt;
3921 char *q;
3922
3923 if (WARN_ON_ONCE(!fmt))
3924 return fmt;
3925
3926 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3927 return fmt;
3928
3929 p = fmt;
3930 new_fmt = q = iter->fmt;
3931 while (*p) {
3932 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3933 if (!trace_iter_expand_format(iter))
3934 return fmt;
3935
3936 q += iter->fmt - new_fmt;
3937 new_fmt = iter->fmt;
3938 }
3939
3940 *q++ = *p++;
3941
3942 /* Replace %p with %px */
3943 if (p[-1] == '%') {
3944 if (p[0] == '%') {
3945 *q++ = *p++;
3946 } else if (p[0] == 'p' && !isalnum(p[1])) {
3947 *q++ = *p++;
3948 *q++ = 'x';
3949 }
3950 }
3951 }
3952 *q = '\0';
3953
3954 return new_fmt;
3955 }
3956
3957 #define STATIC_TEMP_BUF_SIZE 128
3958 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3959
3960 /* Find the next real entry, without updating the iterator itself */
3961 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3962 int *ent_cpu, u64 *ent_ts)
3963 {
3964 /* __find_next_entry will reset ent_size */
3965 int ent_size = iter->ent_size;
3966 struct trace_entry *entry;
3967
3968 /*
3969 * If called from ftrace_dump(), then the iter->temp buffer
3970 * will be the static_temp_buf and not created from kmalloc.
3971 * If the entry size is greater than the buffer, we can
3972 * not save it. Just return NULL in that case. This is only
3973 * used to add markers when two consecutive events' time
3974 * stamps have a large delta. See trace_print_lat_context()
3975 */
3976 if (iter->temp == static_temp_buf &&
3977 STATIC_TEMP_BUF_SIZE < ent_size)
3978 return NULL;
3979
3980 /*
3981 * The __find_next_entry() may call peek_next_entry(), which may
3982 * call ring_buffer_peek() that may make the contents of iter->ent
3983 * undefined. Need to copy iter->ent now.
3984 */
3985 if (iter->ent && iter->ent != iter->temp) {
3986 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3987 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3988 void *temp;
3989 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3990 if (!temp)
3991 return NULL;
3992 kfree(iter->temp);
3993 iter->temp = temp;
3994 iter->temp_size = iter->ent_size;
3995 }
3996 memcpy(iter->temp, iter->ent, iter->ent_size);
3997 iter->ent = iter->temp;
3998 }
3999 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4000 /* Put back the original ent_size */
4001 iter->ent_size = ent_size;
4002
4003 return entry;
4004 }
4005
4006 /* Find the next real entry, and increment the iterator to the next entry */
4007 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4008 {
4009 iter->ent = __find_next_entry(iter, &iter->cpu,
4010 &iter->lost_events, &iter->ts);
4011
4012 if (iter->ent)
4013 trace_iterator_increment(iter);
4014
4015 return iter->ent ? iter : NULL;
4016 }
4017
4018 static void trace_consume(struct trace_iterator *iter)
4019 {
4020 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4021 &iter->lost_events);
4022 }
4023
4024 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4025 {
4026 struct trace_iterator *iter = m->private;
4027 int i = (int)*pos;
4028 void *ent;
4029
4030 WARN_ON_ONCE(iter->leftover);
4031
4032 (*pos)++;
4033
4034 /* can't go backwards */
4035 if (iter->idx > i)
4036 return NULL;
4037
4038 if (iter->idx < 0)
4039 ent = trace_find_next_entry_inc(iter);
4040 else
4041 ent = iter;
4042
4043 while (ent && iter->idx < i)
4044 ent = trace_find_next_entry_inc(iter);
4045
4046 iter->pos = *pos;
4047
4048 return ent;
4049 }
4050
4051 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4052 {
4053 struct ring_buffer_iter *buf_iter;
4054 unsigned long entries = 0;
4055 u64 ts;
4056
4057 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4058
4059 buf_iter = trace_buffer_iter(iter, cpu);
4060 if (!buf_iter)
4061 return;
4062
4063 ring_buffer_iter_reset(buf_iter);
4064
4065 /*
4066 * We could have the case with the max latency tracers
4067 * that a reset never took place on a cpu. This is evident
4068 * by the timestamp being before the start of the buffer.
4069 */
4070 while (ring_buffer_iter_peek(buf_iter, &ts)) {
4071 if (ts >= iter->array_buffer->time_start)
4072 break;
4073 entries++;
4074 ring_buffer_iter_advance(buf_iter);
4075 }
4076
4077 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4078 }
4079
4080 /*
4081 * The current tracer is copied to avoid a global locking
4082 * all around.
4083 */
4084 static void *s_start(struct seq_file *m, loff_t *pos)
4085 {
4086 struct trace_iterator *iter = m->private;
4087 struct trace_array *tr = iter->tr;
4088 int cpu_file = iter->cpu_file;
4089 void *p = NULL;
4090 loff_t l = 0;
4091 int cpu;
4092
4093 /*
4094 * copy the tracer to avoid using a global lock all around.
4095 * iter->trace is a copy of current_trace, the pointer to the
4096 * name may be used instead of a strcmp(), as iter->trace->name
4097 * will point to the same string as current_trace->name.
4098 */
4099 mutex_lock(&trace_types_lock);
4100 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4101 *iter->trace = *tr->current_trace;
4102 mutex_unlock(&trace_types_lock);
4103
4104 #ifdef CONFIG_TRACER_MAX_TRACE
4105 if (iter->snapshot && iter->trace->use_max_tr)
4106 return ERR_PTR(-EBUSY);
4107 #endif
4108
4109 if (*pos != iter->pos) {
4110 iter->ent = NULL;
4111 iter->cpu = 0;
4112 iter->idx = -1;
4113
4114 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4115 for_each_tracing_cpu(cpu)
4116 tracing_iter_reset(iter, cpu);
4117 } else
4118 tracing_iter_reset(iter, cpu_file);
4119
4120 iter->leftover = 0;
4121 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4122 ;
4123
4124 } else {
4125 /*
4126 * If we overflowed the seq_file before, then we want
4127 * to just reuse the trace_seq buffer again.
4128 */
4129 if (iter->leftover)
4130 p = iter;
4131 else {
4132 l = *pos - 1;
4133 p = s_next(m, p, &l);
4134 }
4135 }
4136
4137 trace_event_read_lock();
4138 trace_access_lock(cpu_file);
4139 return p;
4140 }
4141
4142 static void s_stop(struct seq_file *m, void *p)
4143 {
4144 struct trace_iterator *iter = m->private;
4145
4146 #ifdef CONFIG_TRACER_MAX_TRACE
4147 if (iter->snapshot && iter->trace->use_max_tr)
4148 return;
4149 #endif
4150
4151 trace_access_unlock(iter->cpu_file);
4152 trace_event_read_unlock();
4153 }
4154
4155 static void
4156 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4157 unsigned long *entries, int cpu)
4158 {
4159 unsigned long count;
4160
4161 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4162 /*
4163 * If this buffer has skipped entries, then we hold all
4164 * entries for the trace and we need to ignore the
4165 * ones before the time stamp.
4166 */
4167 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4168 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4169 /* total is the same as the entries */
4170 *total = count;
4171 } else
4172 *total = count +
4173 ring_buffer_overrun_cpu(buf->buffer, cpu);
4174 *entries = count;
4175 }
4176
4177 static void
4178 get_total_entries(struct array_buffer *buf,
4179 unsigned long *total, unsigned long *entries)
4180 {
4181 unsigned long t, e;
4182 int cpu;
4183
4184 *total = 0;
4185 *entries = 0;
4186
4187 for_each_tracing_cpu(cpu) {
4188 get_total_entries_cpu(buf, &t, &e, cpu);
4189 *total += t;
4190 *entries += e;
4191 }
4192 }
4193
4194 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4195 {
4196 unsigned long total, entries;
4197
4198 if (!tr)
4199 tr = &global_trace;
4200
4201 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4202
4203 return entries;
4204 }
4205
4206 unsigned long trace_total_entries(struct trace_array *tr)
4207 {
4208 unsigned long total, entries;
4209
4210 if (!tr)
4211 tr = &global_trace;
4212
4213 get_total_entries(&tr->array_buffer, &total, &entries);
4214
4215 return entries;
4216 }
4217
4218 static void print_lat_help_header(struct seq_file *m)
4219 {
4220 seq_puts(m, "# _------=> CPU# \n"
4221 "# / _-----=> irqs-off \n"
4222 "# | / _----=> need-resched \n"
4223 "# || / _---=> hardirq/softirq \n"
4224 "# ||| / _--=> preempt-depth \n"
4225 "# |||| / _-=> migrate-disable \n"
4226 "# ||||| / delay \n"
4227 "# cmd pid |||||| time | caller \n"
4228 "# \\ / |||||| \\ | / \n");
4229 }
4230
4231 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4232 {
4233 unsigned long total;
4234 unsigned long entries;
4235
4236 get_total_entries(buf, &total, &entries);
4237 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4238 entries, total, num_online_cpus());
4239 seq_puts(m, "#\n");
4240 }
4241
4242 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4243 unsigned int flags)
4244 {
4245 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4246
4247 print_event_info(buf, m);
4248
4249 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4250 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4251 }
4252
4253 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4254 unsigned int flags)
4255 {
4256 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4257 const char *space = " ";
4258 int prec = tgid ? 12 : 2;
4259
4260 print_event_info(buf, m);
4261
4262 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
4263 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4264 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4265 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4266 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4267 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4268 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4269 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4270 }
4271
4272 void
4273 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4274 {
4275 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4276 struct array_buffer *buf = iter->array_buffer;
4277 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4278 struct tracer *type = iter->trace;
4279 unsigned long entries;
4280 unsigned long total;
4281 const char *name = "preemption";
4282
4283 name = type->name;
4284
4285 get_total_entries(buf, &total, &entries);
4286
4287 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4288 name, UTS_RELEASE);
4289 seq_puts(m, "# -----------------------------------"
4290 "---------------------------------\n");
4291 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4292 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4293 nsecs_to_usecs(data->saved_latency),
4294 entries,
4295 total,
4296 buf->cpu,
4297 #if defined(CONFIG_PREEMPT_NONE)
4298 "server",
4299 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4300 "desktop",
4301 #elif defined(CONFIG_PREEMPT)
4302 "preempt",
4303 #elif defined(CONFIG_PREEMPT_RT)
4304 "preempt_rt",
4305 #else
4306 "unknown",
4307 #endif
4308 /* These are reserved for later use */
4309 0, 0, 0, 0);
4310 #ifdef CONFIG_SMP
4311 seq_printf(m, " #P:%d)\n", num_online_cpus());
4312 #else
4313 seq_puts(m, ")\n");
4314 #endif
4315 seq_puts(m, "# -----------------\n");
4316 seq_printf(m, "# | task: %.16s-%d "
4317 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4318 data->comm, data->pid,
4319 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4320 data->policy, data->rt_priority);
4321 seq_puts(m, "# -----------------\n");
4322
4323 if (data->critical_start) {
4324 seq_puts(m, "# => started at: ");
4325 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4326 trace_print_seq(m, &iter->seq);
4327 seq_puts(m, "\n# => ended at: ");
4328 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4329 trace_print_seq(m, &iter->seq);
4330 seq_puts(m, "\n#\n");
4331 }
4332
4333 seq_puts(m, "#\n");
4334 }
4335
4336 static void test_cpu_buff_start(struct trace_iterator *iter)
4337 {
4338 struct trace_seq *s = &iter->seq;
4339 struct trace_array *tr = iter->tr;
4340
4341 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4342 return;
4343
4344 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4345 return;
4346
4347 if (cpumask_available(iter->started) &&
4348 cpumask_test_cpu(iter->cpu, iter->started))
4349 return;
4350
4351 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4352 return;
4353
4354 if (cpumask_available(iter->started))
4355 cpumask_set_cpu(iter->cpu, iter->started);
4356
4357 /* Don't print started cpu buffer for the first entry of the trace */
4358 if (iter->idx > 1)
4359 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4360 iter->cpu);
4361 }
4362
4363 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4364 {
4365 struct trace_array *tr = iter->tr;
4366 struct trace_seq *s = &iter->seq;
4367 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4368 struct trace_entry *entry;
4369 struct trace_event *event;
4370
4371 entry = iter->ent;
4372
4373 test_cpu_buff_start(iter);
4374
4375 event = ftrace_find_event(entry->type);
4376
4377 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4378 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4379 trace_print_lat_context(iter);
4380 else
4381 trace_print_context(iter);
4382 }
4383
4384 if (trace_seq_has_overflowed(s))
4385 return TRACE_TYPE_PARTIAL_LINE;
4386
4387 if (event)
4388 return event->funcs->trace(iter, sym_flags, event);
4389
4390 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4391
4392 return trace_handle_return(s);
4393 }
4394
4395 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4396 {
4397 struct trace_array *tr = iter->tr;
4398 struct trace_seq *s = &iter->seq;
4399 struct trace_entry *entry;
4400 struct trace_event *event;
4401
4402 entry = iter->ent;
4403
4404 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4405 trace_seq_printf(s, "%d %d %llu ",
4406 entry->pid, iter->cpu, iter->ts);
4407
4408 if (trace_seq_has_overflowed(s))
4409 return TRACE_TYPE_PARTIAL_LINE;
4410
4411 event = ftrace_find_event(entry->type);
4412 if (event)
4413 return event->funcs->raw(iter, 0, event);
4414
4415 trace_seq_printf(s, "%d ?\n", entry->type);
4416
4417 return trace_handle_return(s);
4418 }
4419
4420 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4421 {
4422 struct trace_array *tr = iter->tr;
4423 struct trace_seq *s = &iter->seq;
4424 unsigned char newline = '\n';
4425 struct trace_entry *entry;
4426 struct trace_event *event;
4427
4428 entry = iter->ent;
4429
4430 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4431 SEQ_PUT_HEX_FIELD(s, entry->pid);
4432 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4433 SEQ_PUT_HEX_FIELD(s, iter->ts);
4434 if (trace_seq_has_overflowed(s))
4435 return TRACE_TYPE_PARTIAL_LINE;
4436 }
4437
4438 event = ftrace_find_event(entry->type);
4439 if (event) {
4440 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4441 if (ret != TRACE_TYPE_HANDLED)
4442 return ret;
4443 }
4444
4445 SEQ_PUT_FIELD(s, newline);
4446
4447 return trace_handle_return(s);
4448 }
4449
4450 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4451 {
4452 struct trace_array *tr = iter->tr;
4453 struct trace_seq *s = &iter->seq;
4454 struct trace_entry *entry;
4455 struct trace_event *event;
4456
4457 entry = iter->ent;
4458
4459 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4460 SEQ_PUT_FIELD(s, entry->pid);
4461 SEQ_PUT_FIELD(s, iter->cpu);
4462 SEQ_PUT_FIELD(s, iter->ts);
4463 if (trace_seq_has_overflowed(s))
4464 return TRACE_TYPE_PARTIAL_LINE;
4465 }
4466
4467 event = ftrace_find_event(entry->type);
4468 return event ? event->funcs->binary(iter, 0, event) :
4469 TRACE_TYPE_HANDLED;
4470 }
4471
4472 int trace_empty(struct trace_iterator *iter)
4473 {
4474 struct ring_buffer_iter *buf_iter;
4475 int cpu;
4476
4477 /* If we are looking at one CPU buffer, only check that one */
4478 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4479 cpu = iter->cpu_file;
4480 buf_iter = trace_buffer_iter(iter, cpu);
4481 if (buf_iter) {
4482 if (!ring_buffer_iter_empty(buf_iter))
4483 return 0;
4484 } else {
4485 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4486 return 0;
4487 }
4488 return 1;
4489 }
4490
4491 for_each_tracing_cpu(cpu) {
4492 buf_iter = trace_buffer_iter(iter, cpu);
4493 if (buf_iter) {
4494 if (!ring_buffer_iter_empty(buf_iter))
4495 return 0;
4496 } else {
4497 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4498 return 0;
4499 }
4500 }
4501
4502 return 1;
4503 }
4504
4505 /* Called with trace_event_read_lock() held. */
4506 enum print_line_t print_trace_line(struct trace_iterator *iter)
4507 {
4508 struct trace_array *tr = iter->tr;
4509 unsigned long trace_flags = tr->trace_flags;
4510 enum print_line_t ret;
4511
4512 if (iter->lost_events) {
4513 if (iter->lost_events == (unsigned long)-1)
4514 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4515 iter->cpu);
4516 else
4517 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4518 iter->cpu, iter->lost_events);
4519 if (trace_seq_has_overflowed(&iter->seq))
4520 return TRACE_TYPE_PARTIAL_LINE;
4521 }
4522
4523 if (iter->trace && iter->trace->print_line) {
4524 ret = iter->trace->print_line(iter);
4525 if (ret != TRACE_TYPE_UNHANDLED)
4526 return ret;
4527 }
4528
4529 if (iter->ent->type == TRACE_BPUTS &&
4530 trace_flags & TRACE_ITER_PRINTK &&
4531 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4532 return trace_print_bputs_msg_only(iter);
4533
4534 if (iter->ent->type == TRACE_BPRINT &&
4535 trace_flags & TRACE_ITER_PRINTK &&
4536 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4537 return trace_print_bprintk_msg_only(iter);
4538
4539 if (iter->ent->type == TRACE_PRINT &&
4540 trace_flags & TRACE_ITER_PRINTK &&
4541 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4542 return trace_print_printk_msg_only(iter);
4543
4544 if (trace_flags & TRACE_ITER_BIN)
4545 return print_bin_fmt(iter);
4546
4547 if (trace_flags & TRACE_ITER_HEX)
4548 return print_hex_fmt(iter);
4549
4550 if (trace_flags & TRACE_ITER_RAW)
4551 return print_raw_fmt(iter);
4552
4553 return print_trace_fmt(iter);
4554 }
4555
4556 void trace_latency_header(struct seq_file *m)
4557 {
4558 struct trace_iterator *iter = m->private;
4559 struct trace_array *tr = iter->tr;
4560
4561 /* print nothing if the buffers are empty */
4562 if (trace_empty(iter))
4563 return;
4564
4565 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4566 print_trace_header(m, iter);
4567
4568 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4569 print_lat_help_header(m);
4570 }
4571
4572 void trace_default_header(struct seq_file *m)
4573 {
4574 struct trace_iterator *iter = m->private;
4575 struct trace_array *tr = iter->tr;
4576 unsigned long trace_flags = tr->trace_flags;
4577
4578 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4579 return;
4580
4581 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4582 /* print nothing if the buffers are empty */
4583 if (trace_empty(iter))
4584 return;
4585 print_trace_header(m, iter);
4586 if (!(trace_flags & TRACE_ITER_VERBOSE))
4587 print_lat_help_header(m);
4588 } else {
4589 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4590 if (trace_flags & TRACE_ITER_IRQ_INFO)
4591 print_func_help_header_irq(iter->array_buffer,
4592 m, trace_flags);
4593 else
4594 print_func_help_header(iter->array_buffer, m,
4595 trace_flags);
4596 }
4597 }
4598 }
4599
4600 static void test_ftrace_alive(struct seq_file *m)
4601 {
4602 if (!ftrace_is_dead())
4603 return;
4604 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4605 "# MAY BE MISSING FUNCTION EVENTS\n");
4606 }
4607
4608 #ifdef CONFIG_TRACER_MAX_TRACE
4609 static void show_snapshot_main_help(struct seq_file *m)
4610 {
4611 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4612 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4613 "# Takes a snapshot of the main buffer.\n"
4614 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4615 "# (Doesn't have to be '2' works with any number that\n"
4616 "# is not a '0' or '1')\n");
4617 }
4618
4619 static void show_snapshot_percpu_help(struct seq_file *m)
4620 {
4621 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4622 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4623 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4624 "# Takes a snapshot of the main buffer for this cpu.\n");
4625 #else
4626 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4627 "# Must use main snapshot file to allocate.\n");
4628 #endif
4629 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4630 "# (Doesn't have to be '2' works with any number that\n"
4631 "# is not a '0' or '1')\n");
4632 }
4633
4634 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4635 {
4636 if (iter->tr->allocated_snapshot)
4637 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4638 else
4639 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4640
4641 seq_puts(m, "# Snapshot commands:\n");
4642 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4643 show_snapshot_main_help(m);
4644 else
4645 show_snapshot_percpu_help(m);
4646 }
4647 #else
4648 /* Should never be called */
4649 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4650 #endif
4651
4652 static int s_show(struct seq_file *m, void *v)
4653 {
4654 struct trace_iterator *iter = v;
4655 int ret;
4656
4657 if (iter->ent == NULL) {
4658 if (iter->tr) {
4659 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4660 seq_puts(m, "#\n");
4661 test_ftrace_alive(m);
4662 }
4663 if (iter->snapshot && trace_empty(iter))
4664 print_snapshot_help(m, iter);
4665 else if (iter->trace && iter->trace->print_header)
4666 iter->trace->print_header(m);
4667 else
4668 trace_default_header(m);
4669
4670 } else if (iter->leftover) {
4671 /*
4672 * If we filled the seq_file buffer earlier, we
4673 * want to just show it now.
4674 */
4675 ret = trace_print_seq(m, &iter->seq);
4676
4677 /* ret should this time be zero, but you never know */
4678 iter->leftover = ret;
4679
4680 } else {
4681 print_trace_line(iter);
4682 ret = trace_print_seq(m, &iter->seq);
4683 /*
4684 * If we overflow the seq_file buffer, then it will
4685 * ask us for this data again at start up.
4686 * Use that instead.
4687 * ret is 0 if seq_file write succeeded.
4688 * -1 otherwise.
4689 */
4690 iter->leftover = ret;
4691 }
4692
4693 return 0;
4694 }
4695
4696 /*
4697 * Should be used after trace_array_get(), trace_types_lock
4698 * ensures that i_cdev was already initialized.
4699 */
4700 static inline int tracing_get_cpu(struct inode *inode)
4701 {
4702 if (inode->i_cdev) /* See trace_create_cpu_file() */
4703 return (long)inode->i_cdev - 1;
4704 return RING_BUFFER_ALL_CPUS;
4705 }
4706
4707 static const struct seq_operations tracer_seq_ops = {
4708 .start = s_start,
4709 .next = s_next,
4710 .stop = s_stop,
4711 .show = s_show,
4712 };
4713
4714 static struct trace_iterator *
4715 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4716 {
4717 struct trace_array *tr = inode->i_private;
4718 struct trace_iterator *iter;
4719 int cpu;
4720
4721 if (tracing_disabled)
4722 return ERR_PTR(-ENODEV);
4723
4724 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4725 if (!iter)
4726 return ERR_PTR(-ENOMEM);
4727
4728 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4729 GFP_KERNEL);
4730 if (!iter->buffer_iter)
4731 goto release;
4732
4733 /*
4734 * trace_find_next_entry() may need to save off iter->ent.
4735 * It will place it into the iter->temp buffer. As most
4736 * events are less than 128, allocate a buffer of that size.
4737 * If one is greater, then trace_find_next_entry() will
4738 * allocate a new buffer to adjust for the bigger iter->ent.
4739 * It's not critical if it fails to get allocated here.
4740 */
4741 iter->temp = kmalloc(128, GFP_KERNEL);
4742 if (iter->temp)
4743 iter->temp_size = 128;
4744
4745 /*
4746 * trace_event_printf() may need to modify given format
4747 * string to replace %p with %px so that it shows real address
4748 * instead of hash value. However, that is only for the event
4749 * tracing, other tracer may not need. Defer the allocation
4750 * until it is needed.
4751 */
4752 iter->fmt = NULL;
4753 iter->fmt_size = 0;
4754
4755 /*
4756 * We make a copy of the current tracer to avoid concurrent
4757 * changes on it while we are reading.
4758 */
4759 mutex_lock(&trace_types_lock);
4760 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4761 if (!iter->trace)
4762 goto fail;
4763
4764 *iter->trace = *tr->current_trace;
4765
4766 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4767 goto fail;
4768
4769 iter->tr = tr;
4770
4771 #ifdef CONFIG_TRACER_MAX_TRACE
4772 /* Currently only the top directory has a snapshot */
4773 if (tr->current_trace->print_max || snapshot)
4774 iter->array_buffer = &tr->max_buffer;
4775 else
4776 #endif
4777 iter->array_buffer = &tr->array_buffer;
4778 iter->snapshot = snapshot;
4779 iter->pos = -1;
4780 iter->cpu_file = tracing_get_cpu(inode);
4781 mutex_init(&iter->mutex);
4782
4783 /* Notify the tracer early; before we stop tracing. */
4784 if (iter->trace->open)
4785 iter->trace->open(iter);
4786
4787 /* Annotate start of buffers if we had overruns */
4788 if (ring_buffer_overruns(iter->array_buffer->buffer))
4789 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4790
4791 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4792 if (trace_clocks[tr->clock_id].in_ns)
4793 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4794
4795 /*
4796 * If pause-on-trace is enabled, then stop the trace while
4797 * dumping, unless this is the "snapshot" file
4798 */
4799 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4800 tracing_stop_tr(tr);
4801
4802 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4803 for_each_tracing_cpu(cpu) {
4804 iter->buffer_iter[cpu] =
4805 ring_buffer_read_prepare(iter->array_buffer->buffer,
4806 cpu, GFP_KERNEL);
4807 }
4808 ring_buffer_read_prepare_sync();
4809 for_each_tracing_cpu(cpu) {
4810 ring_buffer_read_start(iter->buffer_iter[cpu]);
4811 tracing_iter_reset(iter, cpu);
4812 }
4813 } else {
4814 cpu = iter->cpu_file;
4815 iter->buffer_iter[cpu] =
4816 ring_buffer_read_prepare(iter->array_buffer->buffer,
4817 cpu, GFP_KERNEL);
4818 ring_buffer_read_prepare_sync();
4819 ring_buffer_read_start(iter->buffer_iter[cpu]);
4820 tracing_iter_reset(iter, cpu);
4821 }
4822
4823 mutex_unlock(&trace_types_lock);
4824
4825 return iter;
4826
4827 fail:
4828 mutex_unlock(&trace_types_lock);
4829 kfree(iter->trace);
4830 kfree(iter->temp);
4831 kfree(iter->buffer_iter);
4832 release:
4833 seq_release_private(inode, file);
4834 return ERR_PTR(-ENOMEM);
4835 }
4836
4837 int tracing_open_generic(struct inode *inode, struct file *filp)
4838 {
4839 int ret;
4840
4841 ret = tracing_check_open_get_tr(NULL);
4842 if (ret)
4843 return ret;
4844
4845 filp->private_data = inode->i_private;
4846 return 0;
4847 }
4848
4849 bool tracing_is_disabled(void)
4850 {
4851 return (tracing_disabled) ? true: false;
4852 }
4853
4854 /*
4855 * Open and update trace_array ref count.
4856 * Must have the current trace_array passed to it.
4857 */
4858 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4859 {
4860 struct trace_array *tr = inode->i_private;
4861 int ret;
4862
4863 ret = tracing_check_open_get_tr(tr);
4864 if (ret)
4865 return ret;
4866
4867 filp->private_data = inode->i_private;
4868
4869 return 0;
4870 }
4871
4872 static int tracing_release(struct inode *inode, struct file *file)
4873 {
4874 struct trace_array *tr = inode->i_private;
4875 struct seq_file *m = file->private_data;
4876 struct trace_iterator *iter;
4877 int cpu;
4878
4879 if (!(file->f_mode & FMODE_READ)) {
4880 trace_array_put(tr);
4881 return 0;
4882 }
4883
4884 /* Writes do not use seq_file */
4885 iter = m->private;
4886 mutex_lock(&trace_types_lock);
4887
4888 for_each_tracing_cpu(cpu) {
4889 if (iter->buffer_iter[cpu])
4890 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4891 }
4892
4893 if (iter->trace && iter->trace->close)
4894 iter->trace->close(iter);
4895
4896 if (!iter->snapshot && tr->stop_count)
4897 /* reenable tracing if it was previously enabled */
4898 tracing_start_tr(tr);
4899
4900 __trace_array_put(tr);
4901
4902 mutex_unlock(&trace_types_lock);
4903
4904 mutex_destroy(&iter->mutex);
4905 free_cpumask_var(iter->started);
4906 kfree(iter->fmt);
4907 kfree(iter->temp);
4908 kfree(iter->trace);
4909 kfree(iter->buffer_iter);
4910 seq_release_private(inode, file);
4911
4912 return 0;
4913 }
4914
4915 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4916 {
4917 struct trace_array *tr = inode->i_private;
4918
4919 trace_array_put(tr);
4920 return 0;
4921 }
4922
4923 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4924 {
4925 struct trace_array *tr = inode->i_private;
4926
4927 trace_array_put(tr);
4928
4929 return single_release(inode, file);
4930 }
4931
4932 static int tracing_open(struct inode *inode, struct file *file)
4933 {
4934 struct trace_array *tr = inode->i_private;
4935 struct trace_iterator *iter;
4936 int ret;
4937
4938 ret = tracing_check_open_get_tr(tr);
4939 if (ret)
4940 return ret;
4941
4942 /* If this file was open for write, then erase contents */
4943 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4944 int cpu = tracing_get_cpu(inode);
4945 struct array_buffer *trace_buf = &tr->array_buffer;
4946
4947 #ifdef CONFIG_TRACER_MAX_TRACE
4948 if (tr->current_trace->print_max)
4949 trace_buf = &tr->max_buffer;
4950 #endif
4951
4952 if (cpu == RING_BUFFER_ALL_CPUS)
4953 tracing_reset_online_cpus(trace_buf);
4954 else
4955 tracing_reset_cpu(trace_buf, cpu);
4956 }
4957
4958 if (file->f_mode & FMODE_READ) {
4959 iter = __tracing_open(inode, file, false);
4960 if (IS_ERR(iter))
4961 ret = PTR_ERR(iter);
4962 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4963 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4964 }
4965
4966 if (ret < 0)
4967 trace_array_put(tr);
4968
4969 return ret;
4970 }
4971
4972 /*
4973 * Some tracers are not suitable for instance buffers.
4974 * A tracer is always available for the global array (toplevel)
4975 * or if it explicitly states that it is.
4976 */
4977 static bool
4978 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4979 {
4980 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4981 }
4982
4983 /* Find the next tracer that this trace array may use */
4984 static struct tracer *
4985 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4986 {
4987 while (t && !trace_ok_for_array(t, tr))
4988 t = t->next;
4989
4990 return t;
4991 }
4992
4993 static void *
4994 t_next(struct seq_file *m, void *v, loff_t *pos)
4995 {
4996 struct trace_array *tr = m->private;
4997 struct tracer *t = v;
4998
4999 (*pos)++;
5000
5001 if (t)
5002 t = get_tracer_for_array(tr, t->next);
5003
5004 return t;
5005 }
5006
5007 static void *t_start(struct seq_file *m, loff_t *pos)
5008 {
5009 struct trace_array *tr = m->private;
5010 struct tracer *t;
5011 loff_t l = 0;
5012
5013 mutex_lock(&trace_types_lock);
5014
5015 t = get_tracer_for_array(tr, trace_types);
5016 for (; t && l < *pos; t = t_next(m, t, &l))
5017 ;
5018
5019 return t;
5020 }
5021
5022 static void t_stop(struct seq_file *m, void *p)
5023 {
5024 mutex_unlock(&trace_types_lock);
5025 }
5026
5027 static int t_show(struct seq_file *m, void *v)
5028 {
5029 struct tracer *t = v;
5030
5031 if (!t)
5032 return 0;
5033
5034 seq_puts(m, t->name);
5035 if (t->next)
5036 seq_putc(m, ' ');
5037 else
5038 seq_putc(m, '\n');
5039
5040 return 0;
5041 }
5042
5043 static const struct seq_operations show_traces_seq_ops = {
5044 .start = t_start,
5045 .next = t_next,
5046 .stop = t_stop,
5047 .show = t_show,
5048 };
5049
5050 static int show_traces_open(struct inode *inode, struct file *file)
5051 {
5052 struct trace_array *tr = inode->i_private;
5053 struct seq_file *m;
5054 int ret;
5055
5056 ret = tracing_check_open_get_tr(tr);
5057 if (ret)
5058 return ret;
5059
5060 ret = seq_open(file, &show_traces_seq_ops);
5061 if (ret) {
5062 trace_array_put(tr);
5063 return ret;
5064 }
5065
5066 m = file->private_data;
5067 m->private = tr;
5068
5069 return 0;
5070 }
5071
5072 static int show_traces_release(struct inode *inode, struct file *file)
5073 {
5074 struct trace_array *tr = inode->i_private;
5075
5076 trace_array_put(tr);
5077 return seq_release(inode, file);
5078 }
5079
5080 static ssize_t
5081 tracing_write_stub(struct file *filp, const char __user *ubuf,
5082 size_t count, loff_t *ppos)
5083 {
5084 return count;
5085 }
5086
5087 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5088 {
5089 int ret;
5090
5091 if (file->f_mode & FMODE_READ)
5092 ret = seq_lseek(file, offset, whence);
5093 else
5094 file->f_pos = ret = 0;
5095
5096 return ret;
5097 }
5098
5099 static const struct file_operations tracing_fops = {
5100 .open = tracing_open,
5101 .read = seq_read,
5102 .write = tracing_write_stub,
5103 .llseek = tracing_lseek,
5104 .release = tracing_release,
5105 };
5106
5107 static const struct file_operations show_traces_fops = {
5108 .open = show_traces_open,
5109 .read = seq_read,
5110 .llseek = seq_lseek,
5111 .release = show_traces_release,
5112 };
5113
5114 static ssize_t
5115 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5116 size_t count, loff_t *ppos)
5117 {
5118 struct trace_array *tr = file_inode(filp)->i_private;
5119 char *mask_str;
5120 int len;
5121
5122 len = snprintf(NULL, 0, "%*pb\n",
5123 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5124 mask_str = kmalloc(len, GFP_KERNEL);
5125 if (!mask_str)
5126 return -ENOMEM;
5127
5128 len = snprintf(mask_str, len, "%*pb\n",
5129 cpumask_pr_args(tr->tracing_cpumask));
5130 if (len >= count) {
5131 count = -EINVAL;
5132 goto out_err;
5133 }
5134 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5135
5136 out_err:
5137 kfree(mask_str);
5138
5139 return count;
5140 }
5141
5142 int tracing_set_cpumask(struct trace_array *tr,
5143 cpumask_var_t tracing_cpumask_new)
5144 {
5145 int cpu;
5146
5147 if (!tr)
5148 return -EINVAL;
5149
5150 local_irq_disable();
5151 arch_spin_lock(&tr->max_lock);
5152 for_each_tracing_cpu(cpu) {
5153 /*
5154 * Increase/decrease the disabled counter if we are
5155 * about to flip a bit in the cpumask:
5156 */
5157 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5158 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5159 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5160 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5161 }
5162 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5163 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5164 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5165 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5166 }
5167 }
5168 arch_spin_unlock(&tr->max_lock);
5169 local_irq_enable();
5170
5171 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5172
5173 return 0;
5174 }
5175
5176 static ssize_t
5177 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5178 size_t count, loff_t *ppos)
5179 {
5180 struct trace_array *tr = file_inode(filp)->i_private;
5181 cpumask_var_t tracing_cpumask_new;
5182 int err;
5183
5184 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5185 return -ENOMEM;
5186
5187 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5188 if (err)
5189 goto err_free;
5190
5191 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5192 if (err)
5193 goto err_free;
5194
5195 free_cpumask_var(tracing_cpumask_new);
5196
5197 return count;
5198
5199 err_free:
5200 free_cpumask_var(tracing_cpumask_new);
5201
5202 return err;
5203 }
5204
5205 static const struct file_operations tracing_cpumask_fops = {
5206 .open = tracing_open_generic_tr,
5207 .read = tracing_cpumask_read,
5208 .write = tracing_cpumask_write,
5209 .release = tracing_release_generic_tr,
5210 .llseek = generic_file_llseek,
5211 };
5212
5213 static int tracing_trace_options_show(struct seq_file *m, void *v)
5214 {
5215 struct tracer_opt *trace_opts;
5216 struct trace_array *tr = m->private;
5217 u32 tracer_flags;
5218 int i;
5219
5220 mutex_lock(&trace_types_lock);
5221 tracer_flags = tr->current_trace->flags->val;
5222 trace_opts = tr->current_trace->flags->opts;
5223
5224 for (i = 0; trace_options[i]; i++) {
5225 if (tr->trace_flags & (1 << i))
5226 seq_printf(m, "%s\n", trace_options[i]);
5227 else
5228 seq_printf(m, "no%s\n", trace_options[i]);
5229 }
5230
5231 for (i = 0; trace_opts[i].name; i++) {
5232 if (tracer_flags & trace_opts[i].bit)
5233 seq_printf(m, "%s\n", trace_opts[i].name);
5234 else
5235 seq_printf(m, "no%s\n", trace_opts[i].name);
5236 }
5237 mutex_unlock(&trace_types_lock);
5238
5239 return 0;
5240 }
5241
5242 static int __set_tracer_option(struct trace_array *tr,
5243 struct tracer_flags *tracer_flags,
5244 struct tracer_opt *opts, int neg)
5245 {
5246 struct tracer *trace = tracer_flags->trace;
5247 int ret;
5248
5249 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5250 if (ret)
5251 return ret;
5252
5253 if (neg)
5254 tracer_flags->val &= ~opts->bit;
5255 else
5256 tracer_flags->val |= opts->bit;
5257 return 0;
5258 }
5259
5260 /* Try to assign a tracer specific option */
5261 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5262 {
5263 struct tracer *trace = tr->current_trace;
5264 struct tracer_flags *tracer_flags = trace->flags;
5265 struct tracer_opt *opts = NULL;
5266 int i;
5267
5268 for (i = 0; tracer_flags->opts[i].name; i++) {
5269 opts = &tracer_flags->opts[i];
5270
5271 if (strcmp(cmp, opts->name) == 0)
5272 return __set_tracer_option(tr, trace->flags, opts, neg);
5273 }
5274
5275 return -EINVAL;
5276 }
5277
5278 /* Some tracers require overwrite to stay enabled */
5279 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5280 {
5281 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5282 return -1;
5283
5284 return 0;
5285 }
5286
5287 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5288 {
5289 int *map;
5290
5291 if ((mask == TRACE_ITER_RECORD_TGID) ||
5292 (mask == TRACE_ITER_RECORD_CMD))
5293 lockdep_assert_held(&event_mutex);
5294
5295 /* do nothing if flag is already set */
5296 if (!!(tr->trace_flags & mask) == !!enabled)
5297 return 0;
5298
5299 /* Give the tracer a chance to approve the change */
5300 if (tr->current_trace->flag_changed)
5301 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5302 return -EINVAL;
5303
5304 if (enabled)
5305 tr->trace_flags |= mask;
5306 else
5307 tr->trace_flags &= ~mask;
5308
5309 if (mask == TRACE_ITER_RECORD_CMD)
5310 trace_event_enable_cmd_record(enabled);
5311
5312 if (mask == TRACE_ITER_RECORD_TGID) {
5313 if (!tgid_map) {
5314 tgid_map_max = pid_max;
5315 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5316 GFP_KERNEL);
5317
5318 /*
5319 * Pairs with smp_load_acquire() in
5320 * trace_find_tgid_ptr() to ensure that if it observes
5321 * the tgid_map we just allocated then it also observes
5322 * the corresponding tgid_map_max value.
5323 */
5324 smp_store_release(&tgid_map, map);
5325 }
5326 if (!tgid_map) {
5327 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5328 return -ENOMEM;
5329 }
5330
5331 trace_event_enable_tgid_record(enabled);
5332 }
5333
5334 if (mask == TRACE_ITER_EVENT_FORK)
5335 trace_event_follow_fork(tr, enabled);
5336
5337 if (mask == TRACE_ITER_FUNC_FORK)
5338 ftrace_pid_follow_fork(tr, enabled);
5339
5340 if (mask == TRACE_ITER_OVERWRITE) {
5341 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5342 #ifdef CONFIG_TRACER_MAX_TRACE
5343 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5344 #endif
5345 }
5346
5347 if (mask == TRACE_ITER_PRINTK) {
5348 trace_printk_start_stop_comm(enabled);
5349 trace_printk_control(enabled);
5350 }
5351
5352 return 0;
5353 }
5354
5355 int trace_set_options(struct trace_array *tr, char *option)
5356 {
5357 char *cmp;
5358 int neg = 0;
5359 int ret;
5360 size_t orig_len = strlen(option);
5361 int len;
5362
5363 cmp = strstrip(option);
5364
5365 len = str_has_prefix(cmp, "no");
5366 if (len)
5367 neg = 1;
5368
5369 cmp += len;
5370
5371 mutex_lock(&event_mutex);
5372 mutex_lock(&trace_types_lock);
5373
5374 ret = match_string(trace_options, -1, cmp);
5375 /* If no option could be set, test the specific tracer options */
5376 if (ret < 0)
5377 ret = set_tracer_option(tr, cmp, neg);
5378 else
5379 ret = set_tracer_flag(tr, 1 << ret, !neg);
5380
5381 mutex_unlock(&trace_types_lock);
5382 mutex_unlock(&event_mutex);
5383
5384 /*
5385 * If the first trailing whitespace is replaced with '\0' by strstrip,
5386 * turn it back into a space.
5387 */
5388 if (orig_len > strlen(option))
5389 option[strlen(option)] = ' ';
5390
5391 return ret;
5392 }
5393
5394 static void __init apply_trace_boot_options(void)
5395 {
5396 char *buf = trace_boot_options_buf;
5397 char *option;
5398
5399 while (true) {
5400 option = strsep(&buf, ",");
5401
5402 if (!option)
5403 break;
5404
5405 if (*option)
5406 trace_set_options(&global_trace, option);
5407
5408 /* Put back the comma to allow this to be called again */
5409 if (buf)
5410 *(buf - 1) = ',';
5411 }
5412 }
5413
5414 static ssize_t
5415 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5416 size_t cnt, loff_t *ppos)
5417 {
5418 struct seq_file *m = filp->private_data;
5419 struct trace_array *tr = m->private;
5420 char buf[64];
5421 int ret;
5422
5423 if (cnt >= sizeof(buf))
5424 return -EINVAL;
5425
5426 if (copy_from_user(buf, ubuf, cnt))
5427 return -EFAULT;
5428
5429 buf[cnt] = 0;
5430
5431 ret = trace_set_options(tr, buf);
5432 if (ret < 0)
5433 return ret;
5434
5435 *ppos += cnt;
5436
5437 return cnt;
5438 }
5439
5440 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5441 {
5442 struct trace_array *tr = inode->i_private;
5443 int ret;
5444
5445 ret = tracing_check_open_get_tr(tr);
5446 if (ret)
5447 return ret;
5448
5449 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5450 if (ret < 0)
5451 trace_array_put(tr);
5452
5453 return ret;
5454 }
5455
5456 static const struct file_operations tracing_iter_fops = {
5457 .open = tracing_trace_options_open,
5458 .read = seq_read,
5459 .llseek = seq_lseek,
5460 .release = tracing_single_release_tr,
5461 .write = tracing_trace_options_write,
5462 };
5463
5464 static const char readme_msg[] =
5465 "tracing mini-HOWTO:\n\n"
5466 "# echo 0 > tracing_on : quick way to disable tracing\n"
5467 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5468 " Important files:\n"
5469 " trace\t\t\t- The static contents of the buffer\n"
5470 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5471 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5472 " current_tracer\t- function and latency tracers\n"
5473 " available_tracers\t- list of configured tracers for current_tracer\n"
5474 " error_log\t- error log for failed commands (that support it)\n"
5475 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5476 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5477 " trace_clock\t\t-change the clock used to order events\n"
5478 " local: Per cpu clock but may not be synced across CPUs\n"
5479 " global: Synced across CPUs but slows tracing down.\n"
5480 " counter: Not a clock, but just an increment\n"
5481 " uptime: Jiffy counter from time of boot\n"
5482 " perf: Same clock that perf events use\n"
5483 #ifdef CONFIG_X86_64
5484 " x86-tsc: TSC cycle counter\n"
5485 #endif
5486 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5487 " delta: Delta difference against a buffer-wide timestamp\n"
5488 " absolute: Absolute (standalone) timestamp\n"
5489 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5490 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5491 " tracing_cpumask\t- Limit which CPUs to trace\n"
5492 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5493 "\t\t\t Remove sub-buffer with rmdir\n"
5494 " trace_options\t\t- Set format or modify how tracing happens\n"
5495 "\t\t\t Disable an option by prefixing 'no' to the\n"
5496 "\t\t\t option name\n"
5497 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5498 #ifdef CONFIG_DYNAMIC_FTRACE
5499 "\n available_filter_functions - list of functions that can be filtered on\n"
5500 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5501 "\t\t\t functions\n"
5502 "\t accepts: func_full_name or glob-matching-pattern\n"
5503 "\t modules: Can select a group via module\n"
5504 "\t Format: :mod:<module-name>\n"
5505 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5506 "\t triggers: a command to perform when function is hit\n"
5507 "\t Format: <function>:<trigger>[:count]\n"
5508 "\t trigger: traceon, traceoff\n"
5509 "\t\t enable_event:<system>:<event>\n"
5510 "\t\t disable_event:<system>:<event>\n"
5511 #ifdef CONFIG_STACKTRACE
5512 "\t\t stacktrace\n"
5513 #endif
5514 #ifdef CONFIG_TRACER_SNAPSHOT
5515 "\t\t snapshot\n"
5516 #endif
5517 "\t\t dump\n"
5518 "\t\t cpudump\n"
5519 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5520 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5521 "\t The first one will disable tracing every time do_fault is hit\n"
5522 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5523 "\t The first time do trap is hit and it disables tracing, the\n"
5524 "\t counter will decrement to 2. If tracing is already disabled,\n"
5525 "\t the counter will not decrement. It only decrements when the\n"
5526 "\t trigger did work\n"
5527 "\t To remove trigger without count:\n"
5528 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5529 "\t To remove trigger with a count:\n"
5530 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5531 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5532 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5533 "\t modules: Can select a group via module command :mod:\n"
5534 "\t Does not accept triggers\n"
5535 #endif /* CONFIG_DYNAMIC_FTRACE */
5536 #ifdef CONFIG_FUNCTION_TRACER
5537 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5538 "\t\t (function)\n"
5539 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5540 "\t\t (function)\n"
5541 #endif
5542 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5543 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5544 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5545 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5546 #endif
5547 #ifdef CONFIG_TRACER_SNAPSHOT
5548 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5549 "\t\t\t snapshot buffer. Read the contents for more\n"
5550 "\t\t\t information\n"
5551 #endif
5552 #ifdef CONFIG_STACK_TRACER
5553 " stack_trace\t\t- Shows the max stack trace when active\n"
5554 " stack_max_size\t- Shows current max stack size that was traced\n"
5555 "\t\t\t Write into this file to reset the max size (trigger a\n"
5556 "\t\t\t new trace)\n"
5557 #ifdef CONFIG_DYNAMIC_FTRACE
5558 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5559 "\t\t\t traces\n"
5560 #endif
5561 #endif /* CONFIG_STACK_TRACER */
5562 #ifdef CONFIG_DYNAMIC_EVENTS
5563 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5564 "\t\t\t Write into this file to define/undefine new trace events.\n"
5565 #endif
5566 #ifdef CONFIG_KPROBE_EVENTS
5567 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5568 "\t\t\t Write into this file to define/undefine new trace events.\n"
5569 #endif
5570 #ifdef CONFIG_UPROBE_EVENTS
5571 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5572 "\t\t\t Write into this file to define/undefine new trace events.\n"
5573 #endif
5574 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5575 "\t accepts: event-definitions (one definition per line)\n"
5576 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5577 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5578 #ifdef CONFIG_HIST_TRIGGERS
5579 "\t s:[synthetic/]<event> <field> [<field>]\n"
5580 #endif
5581 "\t e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5582 "\t -:[<group>/]<event>\n"
5583 #ifdef CONFIG_KPROBE_EVENTS
5584 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5585 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5586 #endif
5587 #ifdef CONFIG_UPROBE_EVENTS
5588 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5589 #endif
5590 "\t args: <name>=fetcharg[:type]\n"
5591 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5592 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5593 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5594 #else
5595 "\t $stack<index>, $stack, $retval, $comm,\n"
5596 #endif
5597 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5598 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5599 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5600 "\t <type>\\[<array-size>\\]\n"
5601 #ifdef CONFIG_HIST_TRIGGERS
5602 "\t field: <stype> <name>;\n"
5603 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5604 "\t [unsigned] char/int/long\n"
5605 #endif
5606 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5607 "\t of the <attached-group>/<attached-event>.\n"
5608 #endif
5609 " events/\t\t- Directory containing all trace event subsystems:\n"
5610 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5611 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5612 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5613 "\t\t\t events\n"
5614 " filter\t\t- If set, only events passing filter are traced\n"
5615 " events/<system>/<event>/\t- Directory containing control files for\n"
5616 "\t\t\t <event>:\n"
5617 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5618 " filter\t\t- If set, only events passing filter are traced\n"
5619 " trigger\t\t- If set, a command to perform when event is hit\n"
5620 "\t Format: <trigger>[:count][if <filter>]\n"
5621 "\t trigger: traceon, traceoff\n"
5622 "\t enable_event:<system>:<event>\n"
5623 "\t disable_event:<system>:<event>\n"
5624 #ifdef CONFIG_HIST_TRIGGERS
5625 "\t enable_hist:<system>:<event>\n"
5626 "\t disable_hist:<system>:<event>\n"
5627 #endif
5628 #ifdef CONFIG_STACKTRACE
5629 "\t\t stacktrace\n"
5630 #endif
5631 #ifdef CONFIG_TRACER_SNAPSHOT
5632 "\t\t snapshot\n"
5633 #endif
5634 #ifdef CONFIG_HIST_TRIGGERS
5635 "\t\t hist (see below)\n"
5636 #endif
5637 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5638 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5639 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5640 "\t events/block/block_unplug/trigger\n"
5641 "\t The first disables tracing every time block_unplug is hit.\n"
5642 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5643 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5644 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5645 "\t Like function triggers, the counter is only decremented if it\n"
5646 "\t enabled or disabled tracing.\n"
5647 "\t To remove a trigger without a count:\n"
5648 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5649 "\t To remove a trigger with a count:\n"
5650 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5651 "\t Filters can be ignored when removing a trigger.\n"
5652 #ifdef CONFIG_HIST_TRIGGERS
5653 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5654 "\t Format: hist:keys=<field1[,field2,...]>\n"
5655 "\t [:values=<field1[,field2,...]>]\n"
5656 "\t [:sort=<field1[,field2,...]>]\n"
5657 "\t [:size=#entries]\n"
5658 "\t [:pause][:continue][:clear]\n"
5659 "\t [:name=histname1]\n"
5660 "\t [:<handler>.<action>]\n"
5661 "\t [if <filter>]\n\n"
5662 "\t Note, special fields can be used as well:\n"
5663 "\t common_timestamp - to record current timestamp\n"
5664 "\t common_cpu - to record the CPU the event happened on\n"
5665 "\n"
5666 "\t When a matching event is hit, an entry is added to a hash\n"
5667 "\t table using the key(s) and value(s) named, and the value of a\n"
5668 "\t sum called 'hitcount' is incremented. Keys and values\n"
5669 "\t correspond to fields in the event's format description. Keys\n"
5670 "\t can be any field, or the special string 'stacktrace'.\n"
5671 "\t Compound keys consisting of up to two fields can be specified\n"
5672 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5673 "\t fields. Sort keys consisting of up to two fields can be\n"
5674 "\t specified using the 'sort' keyword. The sort direction can\n"
5675 "\t be modified by appending '.descending' or '.ascending' to a\n"
5676 "\t sort field. The 'size' parameter can be used to specify more\n"
5677 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5678 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5679 "\t its histogram data will be shared with other triggers of the\n"
5680 "\t same name, and trigger hits will update this common data.\n\n"
5681 "\t Reading the 'hist' file for the event will dump the hash\n"
5682 "\t table in its entirety to stdout. If there are multiple hist\n"
5683 "\t triggers attached to an event, there will be a table for each\n"
5684 "\t trigger in the output. The table displayed for a named\n"
5685 "\t trigger will be the same as any other instance having the\n"
5686 "\t same name. The default format used to display a given field\n"
5687 "\t can be modified by appending any of the following modifiers\n"
5688 "\t to the field name, as applicable:\n\n"
5689 "\t .hex display a number as a hex value\n"
5690 "\t .sym display an address as a symbol\n"
5691 "\t .sym-offset display an address as a symbol and offset\n"
5692 "\t .execname display a common_pid as a program name\n"
5693 "\t .syscall display a syscall id as a syscall name\n"
5694 "\t .log2 display log2 value rather than raw number\n"
5695 "\t .buckets=size display values in groups of size rather than raw number\n"
5696 "\t .usecs display a common_timestamp in microseconds\n\n"
5697 "\t The 'pause' parameter can be used to pause an existing hist\n"
5698 "\t trigger or to start a hist trigger but not log any events\n"
5699 "\t until told to do so. 'continue' can be used to start or\n"
5700 "\t restart a paused hist trigger.\n\n"
5701 "\t The 'clear' parameter will clear the contents of a running\n"
5702 "\t hist trigger and leave its current paused/active state\n"
5703 "\t unchanged.\n\n"
5704 "\t The enable_hist and disable_hist triggers can be used to\n"
5705 "\t have one event conditionally start and stop another event's\n"
5706 "\t already-attached hist trigger. The syntax is analogous to\n"
5707 "\t the enable_event and disable_event triggers.\n\n"
5708 "\t Hist trigger handlers and actions are executed whenever a\n"
5709 "\t a histogram entry is added or updated. They take the form:\n\n"
5710 "\t <handler>.<action>\n\n"
5711 "\t The available handlers are:\n\n"
5712 "\t onmatch(matching.event) - invoke on addition or update\n"
5713 "\t onmax(var) - invoke if var exceeds current max\n"
5714 "\t onchange(var) - invoke action if var changes\n\n"
5715 "\t The available actions are:\n\n"
5716 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5717 "\t save(field,...) - save current event fields\n"
5718 #ifdef CONFIG_TRACER_SNAPSHOT
5719 "\t snapshot() - snapshot the trace buffer\n\n"
5720 #endif
5721 #ifdef CONFIG_SYNTH_EVENTS
5722 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5723 "\t Write into this file to define/undefine new synthetic events.\n"
5724 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5725 #endif
5726 #endif
5727 ;
5728
5729 static ssize_t
5730 tracing_readme_read(struct file *filp, char __user *ubuf,
5731 size_t cnt, loff_t *ppos)
5732 {
5733 return simple_read_from_buffer(ubuf, cnt, ppos,
5734 readme_msg, strlen(readme_msg));
5735 }
5736
5737 static const struct file_operations tracing_readme_fops = {
5738 .open = tracing_open_generic,
5739 .read = tracing_readme_read,
5740 .llseek = generic_file_llseek,
5741 };
5742
5743 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5744 {
5745 int pid = ++(*pos);
5746
5747 return trace_find_tgid_ptr(pid);
5748 }
5749
5750 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5751 {
5752 int pid = *pos;
5753
5754 return trace_find_tgid_ptr(pid);
5755 }
5756
5757 static void saved_tgids_stop(struct seq_file *m, void *v)
5758 {
5759 }
5760
5761 static int saved_tgids_show(struct seq_file *m, void *v)
5762 {
5763 int *entry = (int *)v;
5764 int pid = entry - tgid_map;
5765 int tgid = *entry;
5766
5767 if (tgid == 0)
5768 return SEQ_SKIP;
5769
5770 seq_printf(m, "%d %d\n", pid, tgid);
5771 return 0;
5772 }
5773
5774 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5775 .start = saved_tgids_start,
5776 .stop = saved_tgids_stop,
5777 .next = saved_tgids_next,
5778 .show = saved_tgids_show,
5779 };
5780
5781 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5782 {
5783 int ret;
5784
5785 ret = tracing_check_open_get_tr(NULL);
5786 if (ret)
5787 return ret;
5788
5789 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5790 }
5791
5792
5793 static const struct file_operations tracing_saved_tgids_fops = {
5794 .open = tracing_saved_tgids_open,
5795 .read = seq_read,
5796 .llseek = seq_lseek,
5797 .release = seq_release,
5798 };
5799
5800 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5801 {
5802 unsigned int *ptr = v;
5803
5804 if (*pos || m->count)
5805 ptr++;
5806
5807 (*pos)++;
5808
5809 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5810 ptr++) {
5811 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5812 continue;
5813
5814 return ptr;
5815 }
5816
5817 return NULL;
5818 }
5819
5820 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5821 {
5822 void *v;
5823 loff_t l = 0;
5824
5825 preempt_disable();
5826 arch_spin_lock(&trace_cmdline_lock);
5827
5828 v = &savedcmd->map_cmdline_to_pid[0];
5829 while (l <= *pos) {
5830 v = saved_cmdlines_next(m, v, &l);
5831 if (!v)
5832 return NULL;
5833 }
5834
5835 return v;
5836 }
5837
5838 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5839 {
5840 arch_spin_unlock(&trace_cmdline_lock);
5841 preempt_enable();
5842 }
5843
5844 static int saved_cmdlines_show(struct seq_file *m, void *v)
5845 {
5846 char buf[TASK_COMM_LEN];
5847 unsigned int *pid = v;
5848
5849 __trace_find_cmdline(*pid, buf);
5850 seq_printf(m, "%d %s\n", *pid, buf);
5851 return 0;
5852 }
5853
5854 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5855 .start = saved_cmdlines_start,
5856 .next = saved_cmdlines_next,
5857 .stop = saved_cmdlines_stop,
5858 .show = saved_cmdlines_show,
5859 };
5860
5861 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5862 {
5863 int ret;
5864
5865 ret = tracing_check_open_get_tr(NULL);
5866 if (ret)
5867 return ret;
5868
5869 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5870 }
5871
5872 static const struct file_operations tracing_saved_cmdlines_fops = {
5873 .open = tracing_saved_cmdlines_open,
5874 .read = seq_read,
5875 .llseek = seq_lseek,
5876 .release = seq_release,
5877 };
5878
5879 static ssize_t
5880 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5881 size_t cnt, loff_t *ppos)
5882 {
5883 char buf[64];
5884 int r;
5885
5886 arch_spin_lock(&trace_cmdline_lock);
5887 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5888 arch_spin_unlock(&trace_cmdline_lock);
5889
5890 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5891 }
5892
5893 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5894 {
5895 kfree(s->saved_cmdlines);
5896 kfree(s->map_cmdline_to_pid);
5897 kfree(s);
5898 }
5899
5900 static int tracing_resize_saved_cmdlines(unsigned int val)
5901 {
5902 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5903
5904 s = kmalloc(sizeof(*s), GFP_KERNEL);
5905 if (!s)
5906 return -ENOMEM;
5907
5908 if (allocate_cmdlines_buffer(val, s) < 0) {
5909 kfree(s);
5910 return -ENOMEM;
5911 }
5912
5913 arch_spin_lock(&trace_cmdline_lock);
5914 savedcmd_temp = savedcmd;
5915 savedcmd = s;
5916 arch_spin_unlock(&trace_cmdline_lock);
5917 free_saved_cmdlines_buffer(savedcmd_temp);
5918
5919 return 0;
5920 }
5921
5922 static ssize_t
5923 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5924 size_t cnt, loff_t *ppos)
5925 {
5926 unsigned long val;
5927 int ret;
5928
5929 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5930 if (ret)
5931 return ret;
5932
5933 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5934 if (!val || val > PID_MAX_DEFAULT)
5935 return -EINVAL;
5936
5937 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5938 if (ret < 0)
5939 return ret;
5940
5941 *ppos += cnt;
5942
5943 return cnt;
5944 }
5945
5946 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5947 .open = tracing_open_generic,
5948 .read = tracing_saved_cmdlines_size_read,
5949 .write = tracing_saved_cmdlines_size_write,
5950 };
5951
5952 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5953 static union trace_eval_map_item *
5954 update_eval_map(union trace_eval_map_item *ptr)
5955 {
5956 if (!ptr->map.eval_string) {
5957 if (ptr->tail.next) {
5958 ptr = ptr->tail.next;
5959 /* Set ptr to the next real item (skip head) */
5960 ptr++;
5961 } else
5962 return NULL;
5963 }
5964 return ptr;
5965 }
5966
5967 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5968 {
5969 union trace_eval_map_item *ptr = v;
5970
5971 /*
5972 * Paranoid! If ptr points to end, we don't want to increment past it.
5973 * This really should never happen.
5974 */
5975 (*pos)++;
5976 ptr = update_eval_map(ptr);
5977 if (WARN_ON_ONCE(!ptr))
5978 return NULL;
5979
5980 ptr++;
5981 ptr = update_eval_map(ptr);
5982
5983 return ptr;
5984 }
5985
5986 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5987 {
5988 union trace_eval_map_item *v;
5989 loff_t l = 0;
5990
5991 mutex_lock(&trace_eval_mutex);
5992
5993 v = trace_eval_maps;
5994 if (v)
5995 v++;
5996
5997 while (v && l < *pos) {
5998 v = eval_map_next(m, v, &l);
5999 }
6000
6001 return v;
6002 }
6003
6004 static void eval_map_stop(struct seq_file *m, void *v)
6005 {
6006 mutex_unlock(&trace_eval_mutex);
6007 }
6008
6009 static int eval_map_show(struct seq_file *m, void *v)
6010 {
6011 union trace_eval_map_item *ptr = v;
6012
6013 seq_printf(m, "%s %ld (%s)\n",
6014 ptr->map.eval_string, ptr->map.eval_value,
6015 ptr->map.system);
6016
6017 return 0;
6018 }
6019
6020 static const struct seq_operations tracing_eval_map_seq_ops = {
6021 .start = eval_map_start,
6022 .next = eval_map_next,
6023 .stop = eval_map_stop,
6024 .show = eval_map_show,
6025 };
6026
6027 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6028 {
6029 int ret;
6030
6031 ret = tracing_check_open_get_tr(NULL);
6032 if (ret)
6033 return ret;
6034
6035 return seq_open(filp, &tracing_eval_map_seq_ops);
6036 }
6037
6038 static const struct file_operations tracing_eval_map_fops = {
6039 .open = tracing_eval_map_open,
6040 .read = seq_read,
6041 .llseek = seq_lseek,
6042 .release = seq_release,
6043 };
6044
6045 static inline union trace_eval_map_item *
6046 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6047 {
6048 /* Return tail of array given the head */
6049 return ptr + ptr->head.length + 1;
6050 }
6051
6052 static void
6053 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6054 int len)
6055 {
6056 struct trace_eval_map **stop;
6057 struct trace_eval_map **map;
6058 union trace_eval_map_item *map_array;
6059 union trace_eval_map_item *ptr;
6060
6061 stop = start + len;
6062
6063 /*
6064 * The trace_eval_maps contains the map plus a head and tail item,
6065 * where the head holds the module and length of array, and the
6066 * tail holds a pointer to the next list.
6067 */
6068 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6069 if (!map_array) {
6070 pr_warn("Unable to allocate trace eval mapping\n");
6071 return;
6072 }
6073
6074 mutex_lock(&trace_eval_mutex);
6075
6076 if (!trace_eval_maps)
6077 trace_eval_maps = map_array;
6078 else {
6079 ptr = trace_eval_maps;
6080 for (;;) {
6081 ptr = trace_eval_jmp_to_tail(ptr);
6082 if (!ptr->tail.next)
6083 break;
6084 ptr = ptr->tail.next;
6085
6086 }
6087 ptr->tail.next = map_array;
6088 }
6089 map_array->head.mod = mod;
6090 map_array->head.length = len;
6091 map_array++;
6092
6093 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6094 map_array->map = **map;
6095 map_array++;
6096 }
6097 memset(map_array, 0, sizeof(*map_array));
6098
6099 mutex_unlock(&trace_eval_mutex);
6100 }
6101
6102 static void trace_create_eval_file(struct dentry *d_tracer)
6103 {
6104 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6105 NULL, &tracing_eval_map_fops);
6106 }
6107
6108 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6109 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6110 static inline void trace_insert_eval_map_file(struct module *mod,
6111 struct trace_eval_map **start, int len) { }
6112 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6113
6114 static void trace_insert_eval_map(struct module *mod,
6115 struct trace_eval_map **start, int len)
6116 {
6117 struct trace_eval_map **map;
6118
6119 if (len <= 0)
6120 return;
6121
6122 map = start;
6123
6124 trace_event_eval_update(map, len);
6125
6126 trace_insert_eval_map_file(mod, start, len);
6127 }
6128
6129 static ssize_t
6130 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6131 size_t cnt, loff_t *ppos)
6132 {
6133 struct trace_array *tr = filp->private_data;
6134 char buf[MAX_TRACER_SIZE+2];
6135 int r;
6136
6137 mutex_lock(&trace_types_lock);
6138 r = sprintf(buf, "%s\n", tr->current_trace->name);
6139 mutex_unlock(&trace_types_lock);
6140
6141 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6142 }
6143
6144 int tracer_init(struct tracer *t, struct trace_array *tr)
6145 {
6146 tracing_reset_online_cpus(&tr->array_buffer);
6147 return t->init(tr);
6148 }
6149
6150 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6151 {
6152 int cpu;
6153
6154 for_each_tracing_cpu(cpu)
6155 per_cpu_ptr(buf->data, cpu)->entries = val;
6156 }
6157
6158 #ifdef CONFIG_TRACER_MAX_TRACE
6159 /* resize @tr's buffer to the size of @size_tr's entries */
6160 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6161 struct array_buffer *size_buf, int cpu_id)
6162 {
6163 int cpu, ret = 0;
6164
6165 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6166 for_each_tracing_cpu(cpu) {
6167 ret = ring_buffer_resize(trace_buf->buffer,
6168 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6169 if (ret < 0)
6170 break;
6171 per_cpu_ptr(trace_buf->data, cpu)->entries =
6172 per_cpu_ptr(size_buf->data, cpu)->entries;
6173 }
6174 } else {
6175 ret = ring_buffer_resize(trace_buf->buffer,
6176 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6177 if (ret == 0)
6178 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6179 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6180 }
6181
6182 return ret;
6183 }
6184 #endif /* CONFIG_TRACER_MAX_TRACE */
6185
6186 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6187 unsigned long size, int cpu)
6188 {
6189 int ret;
6190
6191 /*
6192 * If kernel or user changes the size of the ring buffer
6193 * we use the size that was given, and we can forget about
6194 * expanding it later.
6195 */
6196 ring_buffer_expanded = true;
6197
6198 /* May be called before buffers are initialized */
6199 if (!tr->array_buffer.buffer)
6200 return 0;
6201
6202 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6203 if (ret < 0)
6204 return ret;
6205
6206 #ifdef CONFIG_TRACER_MAX_TRACE
6207 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6208 !tr->current_trace->use_max_tr)
6209 goto out;
6210
6211 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6212 if (ret < 0) {
6213 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6214 &tr->array_buffer, cpu);
6215 if (r < 0) {
6216 /*
6217 * AARGH! We are left with different
6218 * size max buffer!!!!
6219 * The max buffer is our "snapshot" buffer.
6220 * When a tracer needs a snapshot (one of the
6221 * latency tracers), it swaps the max buffer
6222 * with the saved snap shot. We succeeded to
6223 * update the size of the main buffer, but failed to
6224 * update the size of the max buffer. But when we tried
6225 * to reset the main buffer to the original size, we
6226 * failed there too. This is very unlikely to
6227 * happen, but if it does, warn and kill all
6228 * tracing.
6229 */
6230 WARN_ON(1);
6231 tracing_disabled = 1;
6232 }
6233 return ret;
6234 }
6235
6236 if (cpu == RING_BUFFER_ALL_CPUS)
6237 set_buffer_entries(&tr->max_buffer, size);
6238 else
6239 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6240
6241 out:
6242 #endif /* CONFIG_TRACER_MAX_TRACE */
6243
6244 if (cpu == RING_BUFFER_ALL_CPUS)
6245 set_buffer_entries(&tr->array_buffer, size);
6246 else
6247 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6248
6249 return ret;
6250 }
6251
6252 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6253 unsigned long size, int cpu_id)
6254 {
6255 int ret;
6256
6257 mutex_lock(&trace_types_lock);
6258
6259 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6260 /* make sure, this cpu is enabled in the mask */
6261 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6262 ret = -EINVAL;
6263 goto out;
6264 }
6265 }
6266
6267 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6268 if (ret < 0)
6269 ret = -ENOMEM;
6270
6271 out:
6272 mutex_unlock(&trace_types_lock);
6273
6274 return ret;
6275 }
6276
6277
6278 /**
6279 * tracing_update_buffers - used by tracing facility to expand ring buffers
6280 *
6281 * To save on memory when the tracing is never used on a system with it
6282 * configured in. The ring buffers are set to a minimum size. But once
6283 * a user starts to use the tracing facility, then they need to grow
6284 * to their default size.
6285 *
6286 * This function is to be called when a tracer is about to be used.
6287 */
6288 int tracing_update_buffers(void)
6289 {
6290 int ret = 0;
6291
6292 mutex_lock(&trace_types_lock);
6293 if (!ring_buffer_expanded)
6294 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6295 RING_BUFFER_ALL_CPUS);
6296 mutex_unlock(&trace_types_lock);
6297
6298 return ret;
6299 }
6300
6301 struct trace_option_dentry;
6302
6303 static void
6304 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6305
6306 /*
6307 * Used to clear out the tracer before deletion of an instance.
6308 * Must have trace_types_lock held.
6309 */
6310 static void tracing_set_nop(struct trace_array *tr)
6311 {
6312 if (tr->current_trace == &nop_trace)
6313 return;
6314
6315 tr->current_trace->enabled--;
6316
6317 if (tr->current_trace->reset)
6318 tr->current_trace->reset(tr);
6319
6320 tr->current_trace = &nop_trace;
6321 }
6322
6323 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6324 {
6325 /* Only enable if the directory has been created already. */
6326 if (!tr->dir)
6327 return;
6328
6329 create_trace_option_files(tr, t);
6330 }
6331
6332 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6333 {
6334 struct tracer *t;
6335 #ifdef CONFIG_TRACER_MAX_TRACE
6336 bool had_max_tr;
6337 #endif
6338 int ret = 0;
6339
6340 mutex_lock(&trace_types_lock);
6341
6342 if (!ring_buffer_expanded) {
6343 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6344 RING_BUFFER_ALL_CPUS);
6345 if (ret < 0)
6346 goto out;
6347 ret = 0;
6348 }
6349
6350 for (t = trace_types; t; t = t->next) {
6351 if (strcmp(t->name, buf) == 0)
6352 break;
6353 }
6354 if (!t) {
6355 ret = -EINVAL;
6356 goto out;
6357 }
6358 if (t == tr->current_trace)
6359 goto out;
6360
6361 #ifdef CONFIG_TRACER_SNAPSHOT
6362 if (t->use_max_tr) {
6363 arch_spin_lock(&tr->max_lock);
6364 if (tr->cond_snapshot)
6365 ret = -EBUSY;
6366 arch_spin_unlock(&tr->max_lock);
6367 if (ret)
6368 goto out;
6369 }
6370 #endif
6371 /* Some tracers won't work on kernel command line */
6372 if (system_state < SYSTEM_RUNNING && t->noboot) {
6373 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6374 t->name);
6375 goto out;
6376 }
6377
6378 /* Some tracers are only allowed for the top level buffer */
6379 if (!trace_ok_for_array(t, tr)) {
6380 ret = -EINVAL;
6381 goto out;
6382 }
6383
6384 /* If trace pipe files are being read, we can't change the tracer */
6385 if (tr->trace_ref) {
6386 ret = -EBUSY;
6387 goto out;
6388 }
6389
6390 trace_branch_disable();
6391
6392 tr->current_trace->enabled--;
6393
6394 if (tr->current_trace->reset)
6395 tr->current_trace->reset(tr);
6396
6397 /* Current trace needs to be nop_trace before synchronize_rcu */
6398 tr->current_trace = &nop_trace;
6399
6400 #ifdef CONFIG_TRACER_MAX_TRACE
6401 had_max_tr = tr->allocated_snapshot;
6402
6403 if (had_max_tr && !t->use_max_tr) {
6404 /*
6405 * We need to make sure that the update_max_tr sees that
6406 * current_trace changed to nop_trace to keep it from
6407 * swapping the buffers after we resize it.
6408 * The update_max_tr is called from interrupts disabled
6409 * so a synchronized_sched() is sufficient.
6410 */
6411 synchronize_rcu();
6412 free_snapshot(tr);
6413 }
6414 #endif
6415
6416 #ifdef CONFIG_TRACER_MAX_TRACE
6417 if (t->use_max_tr && !had_max_tr) {
6418 ret = tracing_alloc_snapshot_instance(tr);
6419 if (ret < 0)
6420 goto out;
6421 }
6422 #endif
6423
6424 if (t->init) {
6425 ret = tracer_init(t, tr);
6426 if (ret)
6427 goto out;
6428 }
6429
6430 tr->current_trace = t;
6431 tr->current_trace->enabled++;
6432 trace_branch_enable(tr);
6433 out:
6434 mutex_unlock(&trace_types_lock);
6435
6436 return ret;
6437 }
6438
6439 static ssize_t
6440 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6441 size_t cnt, loff_t *ppos)
6442 {
6443 struct trace_array *tr = filp->private_data;
6444 char buf[MAX_TRACER_SIZE+1];
6445 int i;
6446 size_t ret;
6447 int err;
6448
6449 ret = cnt;
6450
6451 if (cnt > MAX_TRACER_SIZE)
6452 cnt = MAX_TRACER_SIZE;
6453
6454 if (copy_from_user(buf, ubuf, cnt))
6455 return -EFAULT;
6456
6457 buf[cnt] = 0;
6458
6459 /* strip ending whitespace. */
6460 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6461 buf[i] = 0;
6462
6463 err = tracing_set_tracer(tr, buf);
6464 if (err)
6465 return err;
6466
6467 *ppos += ret;
6468
6469 return ret;
6470 }
6471
6472 static ssize_t
6473 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6474 size_t cnt, loff_t *ppos)
6475 {
6476 char buf[64];
6477 int r;
6478
6479 r = snprintf(buf, sizeof(buf), "%ld\n",
6480 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6481 if (r > sizeof(buf))
6482 r = sizeof(buf);
6483 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6484 }
6485
6486 static ssize_t
6487 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6488 size_t cnt, loff_t *ppos)
6489 {
6490 unsigned long val;
6491 int ret;
6492
6493 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6494 if (ret)
6495 return ret;
6496
6497 *ptr = val * 1000;
6498
6499 return cnt;
6500 }
6501
6502 static ssize_t
6503 tracing_thresh_read(struct file *filp, char __user *ubuf,
6504 size_t cnt, loff_t *ppos)
6505 {
6506 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6507 }
6508
6509 static ssize_t
6510 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6511 size_t cnt, loff_t *ppos)
6512 {
6513 struct trace_array *tr = filp->private_data;
6514 int ret;
6515
6516 mutex_lock(&trace_types_lock);
6517 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6518 if (ret < 0)
6519 goto out;
6520
6521 if (tr->current_trace->update_thresh) {
6522 ret = tr->current_trace->update_thresh(tr);
6523 if (ret < 0)
6524 goto out;
6525 }
6526
6527 ret = cnt;
6528 out:
6529 mutex_unlock(&trace_types_lock);
6530
6531 return ret;
6532 }
6533
6534 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6535
6536 static ssize_t
6537 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6538 size_t cnt, loff_t *ppos)
6539 {
6540 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6541 }
6542
6543 static ssize_t
6544 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6545 size_t cnt, loff_t *ppos)
6546 {
6547 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6548 }
6549
6550 #endif
6551
6552 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6553 {
6554 struct trace_array *tr = inode->i_private;
6555 struct trace_iterator *iter;
6556 int ret;
6557
6558 ret = tracing_check_open_get_tr(tr);
6559 if (ret)
6560 return ret;
6561
6562 mutex_lock(&trace_types_lock);
6563
6564 /* create a buffer to store the information to pass to userspace */
6565 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6566 if (!iter) {
6567 ret = -ENOMEM;
6568 __trace_array_put(tr);
6569 goto out;
6570 }
6571
6572 trace_seq_init(&iter->seq);
6573 iter->trace = tr->current_trace;
6574
6575 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6576 ret = -ENOMEM;
6577 goto fail;
6578 }
6579
6580 /* trace pipe does not show start of buffer */
6581 cpumask_setall(iter->started);
6582
6583 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6584 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6585
6586 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6587 if (trace_clocks[tr->clock_id].in_ns)
6588 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6589
6590 iter->tr = tr;
6591 iter->array_buffer = &tr->array_buffer;
6592 iter->cpu_file = tracing_get_cpu(inode);
6593 mutex_init(&iter->mutex);
6594 filp->private_data = iter;
6595
6596 if (iter->trace->pipe_open)
6597 iter->trace->pipe_open(iter);
6598
6599 nonseekable_open(inode, filp);
6600
6601 tr->trace_ref++;
6602 out:
6603 mutex_unlock(&trace_types_lock);
6604 return ret;
6605
6606 fail:
6607 kfree(iter);
6608 __trace_array_put(tr);
6609 mutex_unlock(&trace_types_lock);
6610 return ret;
6611 }
6612
6613 static int tracing_release_pipe(struct inode *inode, struct file *file)
6614 {
6615 struct trace_iterator *iter = file->private_data;
6616 struct trace_array *tr = inode->i_private;
6617
6618 mutex_lock(&trace_types_lock);
6619
6620 tr->trace_ref--;
6621
6622 if (iter->trace->pipe_close)
6623 iter->trace->pipe_close(iter);
6624
6625 mutex_unlock(&trace_types_lock);
6626
6627 free_cpumask_var(iter->started);
6628 mutex_destroy(&iter->mutex);
6629 kfree(iter);
6630
6631 trace_array_put(tr);
6632
6633 return 0;
6634 }
6635
6636 static __poll_t
6637 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6638 {
6639 struct trace_array *tr = iter->tr;
6640
6641 /* Iterators are static, they should be filled or empty */
6642 if (trace_buffer_iter(iter, iter->cpu_file))
6643 return EPOLLIN | EPOLLRDNORM;
6644
6645 if (tr->trace_flags & TRACE_ITER_BLOCK)
6646 /*
6647 * Always select as readable when in blocking mode
6648 */
6649 return EPOLLIN | EPOLLRDNORM;
6650 else
6651 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6652 filp, poll_table);
6653 }
6654
6655 static __poll_t
6656 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6657 {
6658 struct trace_iterator *iter = filp->private_data;
6659
6660 return trace_poll(iter, filp, poll_table);
6661 }
6662
6663 /* Must be called with iter->mutex held. */
6664 static int tracing_wait_pipe(struct file *filp)
6665 {
6666 struct trace_iterator *iter = filp->private_data;
6667 int ret;
6668
6669 while (trace_empty(iter)) {
6670
6671 if ((filp->f_flags & O_NONBLOCK)) {
6672 return -EAGAIN;
6673 }
6674
6675 /*
6676 * We block until we read something and tracing is disabled.
6677 * We still block if tracing is disabled, but we have never
6678 * read anything. This allows a user to cat this file, and
6679 * then enable tracing. But after we have read something,
6680 * we give an EOF when tracing is again disabled.
6681 *
6682 * iter->pos will be 0 if we haven't read anything.
6683 */
6684 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6685 break;
6686
6687 mutex_unlock(&iter->mutex);
6688
6689 ret = wait_on_pipe(iter, 0);
6690
6691 mutex_lock(&iter->mutex);
6692
6693 if (ret)
6694 return ret;
6695 }
6696
6697 return 1;
6698 }
6699
6700 /*
6701 * Consumer reader.
6702 */
6703 static ssize_t
6704 tracing_read_pipe(struct file *filp, char __user *ubuf,
6705 size_t cnt, loff_t *ppos)
6706 {
6707 struct trace_iterator *iter = filp->private_data;
6708 ssize_t sret;
6709
6710 /*
6711 * Avoid more than one consumer on a single file descriptor
6712 * This is just a matter of traces coherency, the ring buffer itself
6713 * is protected.
6714 */
6715 mutex_lock(&iter->mutex);
6716
6717 /* return any leftover data */
6718 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6719 if (sret != -EBUSY)
6720 goto out;
6721
6722 trace_seq_init(&iter->seq);
6723
6724 if (iter->trace->read) {
6725 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6726 if (sret)
6727 goto out;
6728 }
6729
6730 waitagain:
6731 sret = tracing_wait_pipe(filp);
6732 if (sret <= 0)
6733 goto out;
6734
6735 /* stop when tracing is finished */
6736 if (trace_empty(iter)) {
6737 sret = 0;
6738 goto out;
6739 }
6740
6741 if (cnt >= PAGE_SIZE)
6742 cnt = PAGE_SIZE - 1;
6743
6744 /* reset all but tr, trace, and overruns */
6745 memset(&iter->seq, 0,
6746 sizeof(struct trace_iterator) -
6747 offsetof(struct trace_iterator, seq));
6748 cpumask_clear(iter->started);
6749 trace_seq_init(&iter->seq);
6750 iter->pos = -1;
6751
6752 trace_event_read_lock();
6753 trace_access_lock(iter->cpu_file);
6754 while (trace_find_next_entry_inc(iter) != NULL) {
6755 enum print_line_t ret;
6756 int save_len = iter->seq.seq.len;
6757
6758 ret = print_trace_line(iter);
6759 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6760 /* don't print partial lines */
6761 iter->seq.seq.len = save_len;
6762 break;
6763 }
6764 if (ret != TRACE_TYPE_NO_CONSUME)
6765 trace_consume(iter);
6766
6767 if (trace_seq_used(&iter->seq) >= cnt)
6768 break;
6769
6770 /*
6771 * Setting the full flag means we reached the trace_seq buffer
6772 * size and we should leave by partial output condition above.
6773 * One of the trace_seq_* functions is not used properly.
6774 */
6775 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6776 iter->ent->type);
6777 }
6778 trace_access_unlock(iter->cpu_file);
6779 trace_event_read_unlock();
6780
6781 /* Now copy what we have to the user */
6782 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6783 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6784 trace_seq_init(&iter->seq);
6785
6786 /*
6787 * If there was nothing to send to user, in spite of consuming trace
6788 * entries, go back to wait for more entries.
6789 */
6790 if (sret == -EBUSY)
6791 goto waitagain;
6792
6793 out:
6794 mutex_unlock(&iter->mutex);
6795
6796 return sret;
6797 }
6798
6799 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6800 unsigned int idx)
6801 {
6802 __free_page(spd->pages[idx]);
6803 }
6804
6805 static size_t
6806 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6807 {
6808 size_t count;
6809 int save_len;
6810 int ret;
6811
6812 /* Seq buffer is page-sized, exactly what we need. */
6813 for (;;) {
6814 save_len = iter->seq.seq.len;
6815 ret = print_trace_line(iter);
6816
6817 if (trace_seq_has_overflowed(&iter->seq)) {
6818 iter->seq.seq.len = save_len;
6819 break;
6820 }
6821
6822 /*
6823 * This should not be hit, because it should only
6824 * be set if the iter->seq overflowed. But check it
6825 * anyway to be safe.
6826 */
6827 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6828 iter->seq.seq.len = save_len;
6829 break;
6830 }
6831
6832 count = trace_seq_used(&iter->seq) - save_len;
6833 if (rem < count) {
6834 rem = 0;
6835 iter->seq.seq.len = save_len;
6836 break;
6837 }
6838
6839 if (ret != TRACE_TYPE_NO_CONSUME)
6840 trace_consume(iter);
6841 rem -= count;
6842 if (!trace_find_next_entry_inc(iter)) {
6843 rem = 0;
6844 iter->ent = NULL;
6845 break;
6846 }
6847 }
6848
6849 return rem;
6850 }
6851
6852 static ssize_t tracing_splice_read_pipe(struct file *filp,
6853 loff_t *ppos,
6854 struct pipe_inode_info *pipe,
6855 size_t len,
6856 unsigned int flags)
6857 {
6858 struct page *pages_def[PIPE_DEF_BUFFERS];
6859 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6860 struct trace_iterator *iter = filp->private_data;
6861 struct splice_pipe_desc spd = {
6862 .pages = pages_def,
6863 .partial = partial_def,
6864 .nr_pages = 0, /* This gets updated below. */
6865 .nr_pages_max = PIPE_DEF_BUFFERS,
6866 .ops = &default_pipe_buf_ops,
6867 .spd_release = tracing_spd_release_pipe,
6868 };
6869 ssize_t ret;
6870 size_t rem;
6871 unsigned int i;
6872
6873 if (splice_grow_spd(pipe, &spd))
6874 return -ENOMEM;
6875
6876 mutex_lock(&iter->mutex);
6877
6878 if (iter->trace->splice_read) {
6879 ret = iter->trace->splice_read(iter, filp,
6880 ppos, pipe, len, flags);
6881 if (ret)
6882 goto out_err;
6883 }
6884
6885 ret = tracing_wait_pipe(filp);
6886 if (ret <= 0)
6887 goto out_err;
6888
6889 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6890 ret = -EFAULT;
6891 goto out_err;
6892 }
6893
6894 trace_event_read_lock();
6895 trace_access_lock(iter->cpu_file);
6896
6897 /* Fill as many pages as possible. */
6898 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6899 spd.pages[i] = alloc_page(GFP_KERNEL);
6900 if (!spd.pages[i])
6901 break;
6902
6903 rem = tracing_fill_pipe_page(rem, iter);
6904
6905 /* Copy the data into the page, so we can start over. */
6906 ret = trace_seq_to_buffer(&iter->seq,
6907 page_address(spd.pages[i]),
6908 trace_seq_used(&iter->seq));
6909 if (ret < 0) {
6910 __free_page(spd.pages[i]);
6911 break;
6912 }
6913 spd.partial[i].offset = 0;
6914 spd.partial[i].len = trace_seq_used(&iter->seq);
6915
6916 trace_seq_init(&iter->seq);
6917 }
6918
6919 trace_access_unlock(iter->cpu_file);
6920 trace_event_read_unlock();
6921 mutex_unlock(&iter->mutex);
6922
6923 spd.nr_pages = i;
6924
6925 if (i)
6926 ret = splice_to_pipe(pipe, &spd);
6927 else
6928 ret = 0;
6929 out:
6930 splice_shrink_spd(&spd);
6931 return ret;
6932
6933 out_err:
6934 mutex_unlock(&iter->mutex);
6935 goto out;
6936 }
6937
6938 static ssize_t
6939 tracing_entries_read(struct file *filp, char __user *ubuf,
6940 size_t cnt, loff_t *ppos)
6941 {
6942 struct inode *inode = file_inode(filp);
6943 struct trace_array *tr = inode->i_private;
6944 int cpu = tracing_get_cpu(inode);
6945 char buf[64];
6946 int r = 0;
6947 ssize_t ret;
6948
6949 mutex_lock(&trace_types_lock);
6950
6951 if (cpu == RING_BUFFER_ALL_CPUS) {
6952 int cpu, buf_size_same;
6953 unsigned long size;
6954
6955 size = 0;
6956 buf_size_same = 1;
6957 /* check if all cpu sizes are same */
6958 for_each_tracing_cpu(cpu) {
6959 /* fill in the size from first enabled cpu */
6960 if (size == 0)
6961 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6962 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6963 buf_size_same = 0;
6964 break;
6965 }
6966 }
6967
6968 if (buf_size_same) {
6969 if (!ring_buffer_expanded)
6970 r = sprintf(buf, "%lu (expanded: %lu)\n",
6971 size >> 10,
6972 trace_buf_size >> 10);
6973 else
6974 r = sprintf(buf, "%lu\n", size >> 10);
6975 } else
6976 r = sprintf(buf, "X\n");
6977 } else
6978 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6979
6980 mutex_unlock(&trace_types_lock);
6981
6982 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6983 return ret;
6984 }
6985
6986 static ssize_t
6987 tracing_entries_write(struct file *filp, const char __user *ubuf,
6988 size_t cnt, loff_t *ppos)
6989 {
6990 struct inode *inode = file_inode(filp);
6991 struct trace_array *tr = inode->i_private;
6992 unsigned long val;
6993 int ret;
6994
6995 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6996 if (ret)
6997 return ret;
6998
6999 /* must have at least 1 entry */
7000 if (!val)
7001 return -EINVAL;
7002
7003 /* value is in KB */
7004 val <<= 10;
7005 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7006 if (ret < 0)
7007 return ret;
7008
7009 *ppos += cnt;
7010
7011 return cnt;
7012 }
7013
7014 static ssize_t
7015 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7016 size_t cnt, loff_t *ppos)
7017 {
7018 struct trace_array *tr = filp->private_data;
7019 char buf[64];
7020 int r, cpu;
7021 unsigned long size = 0, expanded_size = 0;
7022
7023 mutex_lock(&trace_types_lock);
7024 for_each_tracing_cpu(cpu) {
7025 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7026 if (!ring_buffer_expanded)
7027 expanded_size += trace_buf_size >> 10;
7028 }
7029 if (ring_buffer_expanded)
7030 r = sprintf(buf, "%lu\n", size);
7031 else
7032 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7033 mutex_unlock(&trace_types_lock);
7034
7035 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7036 }
7037
7038 static ssize_t
7039 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7040 size_t cnt, loff_t *ppos)
7041 {
7042 /*
7043 * There is no need to read what the user has written, this function
7044 * is just to make sure that there is no error when "echo" is used
7045 */
7046
7047 *ppos += cnt;
7048
7049 return cnt;
7050 }
7051
7052 static int
7053 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7054 {
7055 struct trace_array *tr = inode->i_private;
7056
7057 /* disable tracing ? */
7058 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7059 tracer_tracing_off(tr);
7060 /* resize the ring buffer to 0 */
7061 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7062
7063 trace_array_put(tr);
7064
7065 return 0;
7066 }
7067
7068 static ssize_t
7069 tracing_mark_write(struct file *filp, const char __user *ubuf,
7070 size_t cnt, loff_t *fpos)
7071 {
7072 struct trace_array *tr = filp->private_data;
7073 struct ring_buffer_event *event;
7074 enum event_trigger_type tt = ETT_NONE;
7075 struct trace_buffer *buffer;
7076 struct print_entry *entry;
7077 ssize_t written;
7078 int size;
7079 int len;
7080
7081 /* Used in tracing_mark_raw_write() as well */
7082 #define FAULTED_STR "<faulted>"
7083 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7084
7085 if (tracing_disabled)
7086 return -EINVAL;
7087
7088 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7089 return -EINVAL;
7090
7091 if (cnt > TRACE_BUF_SIZE)
7092 cnt = TRACE_BUF_SIZE;
7093
7094 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7095
7096 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7097
7098 /* If less than "<faulted>", then make sure we can still add that */
7099 if (cnt < FAULTED_SIZE)
7100 size += FAULTED_SIZE - cnt;
7101
7102 buffer = tr->array_buffer.buffer;
7103 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7104 tracing_gen_ctx());
7105 if (unlikely(!event))
7106 /* Ring buffer disabled, return as if not open for write */
7107 return -EBADF;
7108
7109 entry = ring_buffer_event_data(event);
7110 entry->ip = _THIS_IP_;
7111
7112 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7113 if (len) {
7114 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7115 cnt = FAULTED_SIZE;
7116 written = -EFAULT;
7117 } else
7118 written = cnt;
7119
7120 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7121 /* do not add \n before testing triggers, but add \0 */
7122 entry->buf[cnt] = '\0';
7123 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7124 }
7125
7126 if (entry->buf[cnt - 1] != '\n') {
7127 entry->buf[cnt] = '\n';
7128 entry->buf[cnt + 1] = '\0';
7129 } else
7130 entry->buf[cnt] = '\0';
7131
7132 if (static_branch_unlikely(&trace_marker_exports_enabled))
7133 ftrace_exports(event, TRACE_EXPORT_MARKER);
7134 __buffer_unlock_commit(buffer, event);
7135
7136 if (tt)
7137 event_triggers_post_call(tr->trace_marker_file, tt);
7138
7139 if (written > 0)
7140 *fpos += written;
7141
7142 return written;
7143 }
7144
7145 /* Limit it for now to 3K (including tag) */
7146 #define RAW_DATA_MAX_SIZE (1024*3)
7147
7148 static ssize_t
7149 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7150 size_t cnt, loff_t *fpos)
7151 {
7152 struct trace_array *tr = filp->private_data;
7153 struct ring_buffer_event *event;
7154 struct trace_buffer *buffer;
7155 struct raw_data_entry *entry;
7156 ssize_t written;
7157 int size;
7158 int len;
7159
7160 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7161
7162 if (tracing_disabled)
7163 return -EINVAL;
7164
7165 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7166 return -EINVAL;
7167
7168 /* The marker must at least have a tag id */
7169 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7170 return -EINVAL;
7171
7172 if (cnt > TRACE_BUF_SIZE)
7173 cnt = TRACE_BUF_SIZE;
7174
7175 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7176
7177 size = sizeof(*entry) + cnt;
7178 if (cnt < FAULT_SIZE_ID)
7179 size += FAULT_SIZE_ID - cnt;
7180
7181 buffer = tr->array_buffer.buffer;
7182 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7183 tracing_gen_ctx());
7184 if (!event)
7185 /* Ring buffer disabled, return as if not open for write */
7186 return -EBADF;
7187
7188 entry = ring_buffer_event_data(event);
7189
7190 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7191 if (len) {
7192 entry->id = -1;
7193 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7194 written = -EFAULT;
7195 } else
7196 written = cnt;
7197
7198 __buffer_unlock_commit(buffer, event);
7199
7200 if (written > 0)
7201 *fpos += written;
7202
7203 return written;
7204 }
7205
7206 static int tracing_clock_show(struct seq_file *m, void *v)
7207 {
7208 struct trace_array *tr = m->private;
7209 int i;
7210
7211 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7212 seq_printf(m,
7213 "%s%s%s%s", i ? " " : "",
7214 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7215 i == tr->clock_id ? "]" : "");
7216 seq_putc(m, '\n');
7217
7218 return 0;
7219 }
7220
7221 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7222 {
7223 int i;
7224
7225 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7226 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7227 break;
7228 }
7229 if (i == ARRAY_SIZE(trace_clocks))
7230 return -EINVAL;
7231
7232 mutex_lock(&trace_types_lock);
7233
7234 tr->clock_id = i;
7235
7236 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7237
7238 /*
7239 * New clock may not be consistent with the previous clock.
7240 * Reset the buffer so that it doesn't have incomparable timestamps.
7241 */
7242 tracing_reset_online_cpus(&tr->array_buffer);
7243
7244 #ifdef CONFIG_TRACER_MAX_TRACE
7245 if (tr->max_buffer.buffer)
7246 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7247 tracing_reset_online_cpus(&tr->max_buffer);
7248 #endif
7249
7250 mutex_unlock(&trace_types_lock);
7251
7252 return 0;
7253 }
7254
7255 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7256 size_t cnt, loff_t *fpos)
7257 {
7258 struct seq_file *m = filp->private_data;
7259 struct trace_array *tr = m->private;
7260 char buf[64];
7261 const char *clockstr;
7262 int ret;
7263
7264 if (cnt >= sizeof(buf))
7265 return -EINVAL;
7266
7267 if (copy_from_user(buf, ubuf, cnt))
7268 return -EFAULT;
7269
7270 buf[cnt] = 0;
7271
7272 clockstr = strstrip(buf);
7273
7274 ret = tracing_set_clock(tr, clockstr);
7275 if (ret)
7276 return ret;
7277
7278 *fpos += cnt;
7279
7280 return cnt;
7281 }
7282
7283 static int tracing_clock_open(struct inode *inode, struct file *file)
7284 {
7285 struct trace_array *tr = inode->i_private;
7286 int ret;
7287
7288 ret = tracing_check_open_get_tr(tr);
7289 if (ret)
7290 return ret;
7291
7292 ret = single_open(file, tracing_clock_show, inode->i_private);
7293 if (ret < 0)
7294 trace_array_put(tr);
7295
7296 return ret;
7297 }
7298
7299 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7300 {
7301 struct trace_array *tr = m->private;
7302
7303 mutex_lock(&trace_types_lock);
7304
7305 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7306 seq_puts(m, "delta [absolute]\n");
7307 else
7308 seq_puts(m, "[delta] absolute\n");
7309
7310 mutex_unlock(&trace_types_lock);
7311
7312 return 0;
7313 }
7314
7315 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7316 {
7317 struct trace_array *tr = inode->i_private;
7318 int ret;
7319
7320 ret = tracing_check_open_get_tr(tr);
7321 if (ret)
7322 return ret;
7323
7324 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7325 if (ret < 0)
7326 trace_array_put(tr);
7327
7328 return ret;
7329 }
7330
7331 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7332 {
7333 if (rbe == this_cpu_read(trace_buffered_event))
7334 return ring_buffer_time_stamp(buffer);
7335
7336 return ring_buffer_event_time_stamp(buffer, rbe);
7337 }
7338
7339 /*
7340 * Set or disable using the per CPU trace_buffer_event when possible.
7341 */
7342 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7343 {
7344 int ret = 0;
7345
7346 mutex_lock(&trace_types_lock);
7347
7348 if (set && tr->no_filter_buffering_ref++)
7349 goto out;
7350
7351 if (!set) {
7352 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7353 ret = -EINVAL;
7354 goto out;
7355 }
7356
7357 --tr->no_filter_buffering_ref;
7358 }
7359 out:
7360 mutex_unlock(&trace_types_lock);
7361
7362 return ret;
7363 }
7364
7365 struct ftrace_buffer_info {
7366 struct trace_iterator iter;
7367 void *spare;
7368 unsigned int spare_cpu;
7369 unsigned int read;
7370 };
7371
7372 #ifdef CONFIG_TRACER_SNAPSHOT
7373 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7374 {
7375 struct trace_array *tr = inode->i_private;
7376 struct trace_iterator *iter;
7377 struct seq_file *m;
7378 int ret;
7379
7380 ret = tracing_check_open_get_tr(tr);
7381 if (ret)
7382 return ret;
7383
7384 if (file->f_mode & FMODE_READ) {
7385 iter = __tracing_open(inode, file, true);
7386 if (IS_ERR(iter))
7387 ret = PTR_ERR(iter);
7388 } else {
7389 /* Writes still need the seq_file to hold the private data */
7390 ret = -ENOMEM;
7391 m = kzalloc(sizeof(*m), GFP_KERNEL);
7392 if (!m)
7393 goto out;
7394 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7395 if (!iter) {
7396 kfree(m);
7397 goto out;
7398 }
7399 ret = 0;
7400
7401 iter->tr = tr;
7402 iter->array_buffer = &tr->max_buffer;
7403 iter->cpu_file = tracing_get_cpu(inode);
7404 m->private = iter;
7405 file->private_data = m;
7406 }
7407 out:
7408 if (ret < 0)
7409 trace_array_put(tr);
7410
7411 return ret;
7412 }
7413
7414 static ssize_t
7415 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7416 loff_t *ppos)
7417 {
7418 struct seq_file *m = filp->private_data;
7419 struct trace_iterator *iter = m->private;
7420 struct trace_array *tr = iter->tr;
7421 unsigned long val;
7422 int ret;
7423
7424 ret = tracing_update_buffers();
7425 if (ret < 0)
7426 return ret;
7427
7428 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7429 if (ret)
7430 return ret;
7431
7432 mutex_lock(&trace_types_lock);
7433
7434 if (tr->current_trace->use_max_tr) {
7435 ret = -EBUSY;
7436 goto out;
7437 }
7438
7439 arch_spin_lock(&tr->max_lock);
7440 if (tr->cond_snapshot)
7441 ret = -EBUSY;
7442 arch_spin_unlock(&tr->max_lock);
7443 if (ret)
7444 goto out;
7445
7446 switch (val) {
7447 case 0:
7448 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7449 ret = -EINVAL;
7450 break;
7451 }
7452 if (tr->allocated_snapshot)
7453 free_snapshot(tr);
7454 break;
7455 case 1:
7456 /* Only allow per-cpu swap if the ring buffer supports it */
7457 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7458 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7459 ret = -EINVAL;
7460 break;
7461 }
7462 #endif
7463 if (tr->allocated_snapshot)
7464 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7465 &tr->array_buffer, iter->cpu_file);
7466 else
7467 ret = tracing_alloc_snapshot_instance(tr);
7468 if (ret < 0)
7469 break;
7470 local_irq_disable();
7471 /* Now, we're going to swap */
7472 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7473 update_max_tr(tr, current, smp_processor_id(), NULL);
7474 else
7475 update_max_tr_single(tr, current, iter->cpu_file);
7476 local_irq_enable();
7477 break;
7478 default:
7479 if (tr->allocated_snapshot) {
7480 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7481 tracing_reset_online_cpus(&tr->max_buffer);
7482 else
7483 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7484 }
7485 break;
7486 }
7487
7488 if (ret >= 0) {
7489 *ppos += cnt;
7490 ret = cnt;
7491 }
7492 out:
7493 mutex_unlock(&trace_types_lock);
7494 return ret;
7495 }
7496
7497 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7498 {
7499 struct seq_file *m = file->private_data;
7500 int ret;
7501
7502 ret = tracing_release(inode, file);
7503
7504 if (file->f_mode & FMODE_READ)
7505 return ret;
7506
7507 /* If write only, the seq_file is just a stub */
7508 if (m)
7509 kfree(m->private);
7510 kfree(m);
7511
7512 return 0;
7513 }
7514
7515 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7516 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7517 size_t count, loff_t *ppos);
7518 static int tracing_buffers_release(struct inode *inode, struct file *file);
7519 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7520 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7521
7522 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7523 {
7524 struct ftrace_buffer_info *info;
7525 int ret;
7526
7527 /* The following checks for tracefs lockdown */
7528 ret = tracing_buffers_open(inode, filp);
7529 if (ret < 0)
7530 return ret;
7531
7532 info = filp->private_data;
7533
7534 if (info->iter.trace->use_max_tr) {
7535 tracing_buffers_release(inode, filp);
7536 return -EBUSY;
7537 }
7538
7539 info->iter.snapshot = true;
7540 info->iter.array_buffer = &info->iter.tr->max_buffer;
7541
7542 return ret;
7543 }
7544
7545 #endif /* CONFIG_TRACER_SNAPSHOT */
7546
7547
7548 static const struct file_operations tracing_thresh_fops = {
7549 .open = tracing_open_generic,
7550 .read = tracing_thresh_read,
7551 .write = tracing_thresh_write,
7552 .llseek = generic_file_llseek,
7553 };
7554
7555 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7556 static const struct file_operations tracing_max_lat_fops = {
7557 .open = tracing_open_generic,
7558 .read = tracing_max_lat_read,
7559 .write = tracing_max_lat_write,
7560 .llseek = generic_file_llseek,
7561 };
7562 #endif
7563
7564 static const struct file_operations set_tracer_fops = {
7565 .open = tracing_open_generic,
7566 .read = tracing_set_trace_read,
7567 .write = tracing_set_trace_write,
7568 .llseek = generic_file_llseek,
7569 };
7570
7571 static const struct file_operations tracing_pipe_fops = {
7572 .open = tracing_open_pipe,
7573 .poll = tracing_poll_pipe,
7574 .read = tracing_read_pipe,
7575 .splice_read = tracing_splice_read_pipe,
7576 .release = tracing_release_pipe,
7577 .llseek = no_llseek,
7578 };
7579
7580 static const struct file_operations tracing_entries_fops = {
7581 .open = tracing_open_generic_tr,
7582 .read = tracing_entries_read,
7583 .write = tracing_entries_write,
7584 .llseek = generic_file_llseek,
7585 .release = tracing_release_generic_tr,
7586 };
7587
7588 static const struct file_operations tracing_total_entries_fops = {
7589 .open = tracing_open_generic_tr,
7590 .read = tracing_total_entries_read,
7591 .llseek = generic_file_llseek,
7592 .release = tracing_release_generic_tr,
7593 };
7594
7595 static const struct file_operations tracing_free_buffer_fops = {
7596 .open = tracing_open_generic_tr,
7597 .write = tracing_free_buffer_write,
7598 .release = tracing_free_buffer_release,
7599 };
7600
7601 static const struct file_operations tracing_mark_fops = {
7602 .open = tracing_open_generic_tr,
7603 .write = tracing_mark_write,
7604 .llseek = generic_file_llseek,
7605 .release = tracing_release_generic_tr,
7606 };
7607
7608 static const struct file_operations tracing_mark_raw_fops = {
7609 .open = tracing_open_generic_tr,
7610 .write = tracing_mark_raw_write,
7611 .llseek = generic_file_llseek,
7612 .release = tracing_release_generic_tr,
7613 };
7614
7615 static const struct file_operations trace_clock_fops = {
7616 .open = tracing_clock_open,
7617 .read = seq_read,
7618 .llseek = seq_lseek,
7619 .release = tracing_single_release_tr,
7620 .write = tracing_clock_write,
7621 };
7622
7623 static const struct file_operations trace_time_stamp_mode_fops = {
7624 .open = tracing_time_stamp_mode_open,
7625 .read = seq_read,
7626 .llseek = seq_lseek,
7627 .release = tracing_single_release_tr,
7628 };
7629
7630 #ifdef CONFIG_TRACER_SNAPSHOT
7631 static const struct file_operations snapshot_fops = {
7632 .open = tracing_snapshot_open,
7633 .read = seq_read,
7634 .write = tracing_snapshot_write,
7635 .llseek = tracing_lseek,
7636 .release = tracing_snapshot_release,
7637 };
7638
7639 static const struct file_operations snapshot_raw_fops = {
7640 .open = snapshot_raw_open,
7641 .read = tracing_buffers_read,
7642 .release = tracing_buffers_release,
7643 .splice_read = tracing_buffers_splice_read,
7644 .llseek = no_llseek,
7645 };
7646
7647 #endif /* CONFIG_TRACER_SNAPSHOT */
7648
7649 /*
7650 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7651 * @filp: The active open file structure
7652 * @ubuf: The userspace provided buffer to read value into
7653 * @cnt: The maximum number of bytes to read
7654 * @ppos: The current "file" position
7655 *
7656 * This function implements the write interface for a struct trace_min_max_param.
7657 * The filp->private_data must point to a trace_min_max_param structure that
7658 * defines where to write the value, the min and the max acceptable values,
7659 * and a lock to protect the write.
7660 */
7661 static ssize_t
7662 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7663 {
7664 struct trace_min_max_param *param = filp->private_data;
7665 u64 val;
7666 int err;
7667
7668 if (!param)
7669 return -EFAULT;
7670
7671 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7672 if (err)
7673 return err;
7674
7675 if (param->lock)
7676 mutex_lock(param->lock);
7677
7678 if (param->min && val < *param->min)
7679 err = -EINVAL;
7680
7681 if (param->max && val > *param->max)
7682 err = -EINVAL;
7683
7684 if (!err)
7685 *param->val = val;
7686
7687 if (param->lock)
7688 mutex_unlock(param->lock);
7689
7690 if (err)
7691 return err;
7692
7693 return cnt;
7694 }
7695
7696 /*
7697 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7698 * @filp: The active open file structure
7699 * @ubuf: The userspace provided buffer to read value into
7700 * @cnt: The maximum number of bytes to read
7701 * @ppos: The current "file" position
7702 *
7703 * This function implements the read interface for a struct trace_min_max_param.
7704 * The filp->private_data must point to a trace_min_max_param struct with valid
7705 * data.
7706 */
7707 static ssize_t
7708 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7709 {
7710 struct trace_min_max_param *param = filp->private_data;
7711 char buf[U64_STR_SIZE];
7712 int len;
7713 u64 val;
7714
7715 if (!param)
7716 return -EFAULT;
7717
7718 val = *param->val;
7719
7720 if (cnt > sizeof(buf))
7721 cnt = sizeof(buf);
7722
7723 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7724
7725 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7726 }
7727
7728 const struct file_operations trace_min_max_fops = {
7729 .open = tracing_open_generic,
7730 .read = trace_min_max_read,
7731 .write = trace_min_max_write,
7732 };
7733
7734 #define TRACING_LOG_ERRS_MAX 8
7735 #define TRACING_LOG_LOC_MAX 128
7736
7737 #define CMD_PREFIX " Command: "
7738
7739 struct err_info {
7740 const char **errs; /* ptr to loc-specific array of err strings */
7741 u8 type; /* index into errs -> specific err string */
7742 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7743 u64 ts;
7744 };
7745
7746 struct tracing_log_err {
7747 struct list_head list;
7748 struct err_info info;
7749 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7750 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7751 };
7752
7753 static DEFINE_MUTEX(tracing_err_log_lock);
7754
7755 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7756 {
7757 struct tracing_log_err *err;
7758
7759 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7760 err = kzalloc(sizeof(*err), GFP_KERNEL);
7761 if (!err)
7762 err = ERR_PTR(-ENOMEM);
7763 else
7764 tr->n_err_log_entries++;
7765
7766 return err;
7767 }
7768
7769 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7770 list_del(&err->list);
7771
7772 return err;
7773 }
7774
7775 /**
7776 * err_pos - find the position of a string within a command for error careting
7777 * @cmd: The tracing command that caused the error
7778 * @str: The string to position the caret at within @cmd
7779 *
7780 * Finds the position of the first occurrence of @str within @cmd. The
7781 * return value can be passed to tracing_log_err() for caret placement
7782 * within @cmd.
7783 *
7784 * Returns the index within @cmd of the first occurrence of @str or 0
7785 * if @str was not found.
7786 */
7787 unsigned int err_pos(char *cmd, const char *str)
7788 {
7789 char *found;
7790
7791 if (WARN_ON(!strlen(cmd)))
7792 return 0;
7793
7794 found = strstr(cmd, str);
7795 if (found)
7796 return found - cmd;
7797
7798 return 0;
7799 }
7800
7801 /**
7802 * tracing_log_err - write an error to the tracing error log
7803 * @tr: The associated trace array for the error (NULL for top level array)
7804 * @loc: A string describing where the error occurred
7805 * @cmd: The tracing command that caused the error
7806 * @errs: The array of loc-specific static error strings
7807 * @type: The index into errs[], which produces the specific static err string
7808 * @pos: The position the caret should be placed in the cmd
7809 *
7810 * Writes an error into tracing/error_log of the form:
7811 *
7812 * <loc>: error: <text>
7813 * Command: <cmd>
7814 * ^
7815 *
7816 * tracing/error_log is a small log file containing the last
7817 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7818 * unless there has been a tracing error, and the error log can be
7819 * cleared and have its memory freed by writing the empty string in
7820 * truncation mode to it i.e. echo > tracing/error_log.
7821 *
7822 * NOTE: the @errs array along with the @type param are used to
7823 * produce a static error string - this string is not copied and saved
7824 * when the error is logged - only a pointer to it is saved. See
7825 * existing callers for examples of how static strings are typically
7826 * defined for use with tracing_log_err().
7827 */
7828 void tracing_log_err(struct trace_array *tr,
7829 const char *loc, const char *cmd,
7830 const char **errs, u8 type, u8 pos)
7831 {
7832 struct tracing_log_err *err;
7833
7834 if (!tr)
7835 tr = &global_trace;
7836
7837 mutex_lock(&tracing_err_log_lock);
7838 err = get_tracing_log_err(tr);
7839 if (PTR_ERR(err) == -ENOMEM) {
7840 mutex_unlock(&tracing_err_log_lock);
7841 return;
7842 }
7843
7844 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7845 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7846
7847 err->info.errs = errs;
7848 err->info.type = type;
7849 err->info.pos = pos;
7850 err->info.ts = local_clock();
7851
7852 list_add_tail(&err->list, &tr->err_log);
7853 mutex_unlock(&tracing_err_log_lock);
7854 }
7855
7856 static void clear_tracing_err_log(struct trace_array *tr)
7857 {
7858 struct tracing_log_err *err, *next;
7859
7860 mutex_lock(&tracing_err_log_lock);
7861 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7862 list_del(&err->list);
7863 kfree(err);
7864 }
7865
7866 tr->n_err_log_entries = 0;
7867 mutex_unlock(&tracing_err_log_lock);
7868 }
7869
7870 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7871 {
7872 struct trace_array *tr = m->private;
7873
7874 mutex_lock(&tracing_err_log_lock);
7875
7876 return seq_list_start(&tr->err_log, *pos);
7877 }
7878
7879 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7880 {
7881 struct trace_array *tr = m->private;
7882
7883 return seq_list_next(v, &tr->err_log, pos);
7884 }
7885
7886 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7887 {
7888 mutex_unlock(&tracing_err_log_lock);
7889 }
7890
7891 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7892 {
7893 u8 i;
7894
7895 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7896 seq_putc(m, ' ');
7897 for (i = 0; i < pos; i++)
7898 seq_putc(m, ' ');
7899 seq_puts(m, "^\n");
7900 }
7901
7902 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7903 {
7904 struct tracing_log_err *err = v;
7905
7906 if (err) {
7907 const char *err_text = err->info.errs[err->info.type];
7908 u64 sec = err->info.ts;
7909 u32 nsec;
7910
7911 nsec = do_div(sec, NSEC_PER_SEC);
7912 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7913 err->loc, err_text);
7914 seq_printf(m, "%s", err->cmd);
7915 tracing_err_log_show_pos(m, err->info.pos);
7916 }
7917
7918 return 0;
7919 }
7920
7921 static const struct seq_operations tracing_err_log_seq_ops = {
7922 .start = tracing_err_log_seq_start,
7923 .next = tracing_err_log_seq_next,
7924 .stop = tracing_err_log_seq_stop,
7925 .show = tracing_err_log_seq_show
7926 };
7927
7928 static int tracing_err_log_open(struct inode *inode, struct file *file)
7929 {
7930 struct trace_array *tr = inode->i_private;
7931 int ret = 0;
7932
7933 ret = tracing_check_open_get_tr(tr);
7934 if (ret)
7935 return ret;
7936
7937 /* If this file was opened for write, then erase contents */
7938 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7939 clear_tracing_err_log(tr);
7940
7941 if (file->f_mode & FMODE_READ) {
7942 ret = seq_open(file, &tracing_err_log_seq_ops);
7943 if (!ret) {
7944 struct seq_file *m = file->private_data;
7945 m->private = tr;
7946 } else {
7947 trace_array_put(tr);
7948 }
7949 }
7950 return ret;
7951 }
7952
7953 static ssize_t tracing_err_log_write(struct file *file,
7954 const char __user *buffer,
7955 size_t count, loff_t *ppos)
7956 {
7957 return count;
7958 }
7959
7960 static int tracing_err_log_release(struct inode *inode, struct file *file)
7961 {
7962 struct trace_array *tr = inode->i_private;
7963
7964 trace_array_put(tr);
7965
7966 if (file->f_mode & FMODE_READ)
7967 seq_release(inode, file);
7968
7969 return 0;
7970 }
7971
7972 static const struct file_operations tracing_err_log_fops = {
7973 .open = tracing_err_log_open,
7974 .write = tracing_err_log_write,
7975 .read = seq_read,
7976 .llseek = seq_lseek,
7977 .release = tracing_err_log_release,
7978 };
7979
7980 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7981 {
7982 struct trace_array *tr = inode->i_private;
7983 struct ftrace_buffer_info *info;
7984 int ret;
7985
7986 ret = tracing_check_open_get_tr(tr);
7987 if (ret)
7988 return ret;
7989
7990 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7991 if (!info) {
7992 trace_array_put(tr);
7993 return -ENOMEM;
7994 }
7995
7996 mutex_lock(&trace_types_lock);
7997
7998 info->iter.tr = tr;
7999 info->iter.cpu_file = tracing_get_cpu(inode);
8000 info->iter.trace = tr->current_trace;
8001 info->iter.array_buffer = &tr->array_buffer;
8002 info->spare = NULL;
8003 /* Force reading ring buffer for first read */
8004 info->read = (unsigned int)-1;
8005
8006 filp->private_data = info;
8007
8008 tr->trace_ref++;
8009
8010 mutex_unlock(&trace_types_lock);
8011
8012 ret = nonseekable_open(inode, filp);
8013 if (ret < 0)
8014 trace_array_put(tr);
8015
8016 return ret;
8017 }
8018
8019 static __poll_t
8020 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8021 {
8022 struct ftrace_buffer_info *info = filp->private_data;
8023 struct trace_iterator *iter = &info->iter;
8024
8025 return trace_poll(iter, filp, poll_table);
8026 }
8027
8028 static ssize_t
8029 tracing_buffers_read(struct file *filp, char __user *ubuf,
8030 size_t count, loff_t *ppos)
8031 {
8032 struct ftrace_buffer_info *info = filp->private_data;
8033 struct trace_iterator *iter = &info->iter;
8034 ssize_t ret = 0;
8035 ssize_t size;
8036
8037 if (!count)
8038 return 0;
8039
8040 #ifdef CONFIG_TRACER_MAX_TRACE
8041 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8042 return -EBUSY;
8043 #endif
8044
8045 if (!info->spare) {
8046 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8047 iter->cpu_file);
8048 if (IS_ERR(info->spare)) {
8049 ret = PTR_ERR(info->spare);
8050 info->spare = NULL;
8051 } else {
8052 info->spare_cpu = iter->cpu_file;
8053 }
8054 }
8055 if (!info->spare)
8056 return ret;
8057
8058 /* Do we have previous read data to read? */
8059 if (info->read < PAGE_SIZE)
8060 goto read;
8061
8062 again:
8063 trace_access_lock(iter->cpu_file);
8064 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8065 &info->spare,
8066 count,
8067 iter->cpu_file, 0);
8068 trace_access_unlock(iter->cpu_file);
8069
8070 if (ret < 0) {
8071 if (trace_empty(iter)) {
8072 if ((filp->f_flags & O_NONBLOCK))
8073 return -EAGAIN;
8074
8075 ret = wait_on_pipe(iter, 0);
8076 if (ret)
8077 return ret;
8078
8079 goto again;
8080 }
8081 return 0;
8082 }
8083
8084 info->read = 0;
8085 read:
8086 size = PAGE_SIZE - info->read;
8087 if (size > count)
8088 size = count;
8089
8090 ret = copy_to_user(ubuf, info->spare + info->read, size);
8091 if (ret == size)
8092 return -EFAULT;
8093
8094 size -= ret;
8095
8096 *ppos += size;
8097 info->read += size;
8098
8099 return size;
8100 }
8101
8102 static int tracing_buffers_release(struct inode *inode, struct file *file)
8103 {
8104 struct ftrace_buffer_info *info = file->private_data;
8105 struct trace_iterator *iter = &info->iter;
8106
8107 mutex_lock(&trace_types_lock);
8108
8109 iter->tr->trace_ref--;
8110
8111 __trace_array_put(iter->tr);
8112
8113 if (info->spare)
8114 ring_buffer_free_read_page(iter->array_buffer->buffer,
8115 info->spare_cpu, info->spare);
8116 kvfree(info);
8117
8118 mutex_unlock(&trace_types_lock);
8119
8120 return 0;
8121 }
8122
8123 struct buffer_ref {
8124 struct trace_buffer *buffer;
8125 void *page;
8126 int cpu;
8127 refcount_t refcount;
8128 };
8129
8130 static void buffer_ref_release(struct buffer_ref *ref)
8131 {
8132 if (!refcount_dec_and_test(&ref->refcount))
8133 return;
8134 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8135 kfree(ref);
8136 }
8137
8138 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8139 struct pipe_buffer *buf)
8140 {
8141 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8142
8143 buffer_ref_release(ref);
8144 buf->private = 0;
8145 }
8146
8147 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8148 struct pipe_buffer *buf)
8149 {
8150 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8151
8152 if (refcount_read(&ref->refcount) > INT_MAX/2)
8153 return false;
8154
8155 refcount_inc(&ref->refcount);
8156 return true;
8157 }
8158
8159 /* Pipe buffer operations for a buffer. */
8160 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8161 .release = buffer_pipe_buf_release,
8162 .get = buffer_pipe_buf_get,
8163 };
8164
8165 /*
8166 * Callback from splice_to_pipe(), if we need to release some pages
8167 * at the end of the spd in case we error'ed out in filling the pipe.
8168 */
8169 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8170 {
8171 struct buffer_ref *ref =
8172 (struct buffer_ref *)spd->partial[i].private;
8173
8174 buffer_ref_release(ref);
8175 spd->partial[i].private = 0;
8176 }
8177
8178 static ssize_t
8179 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8180 struct pipe_inode_info *pipe, size_t len,
8181 unsigned int flags)
8182 {
8183 struct ftrace_buffer_info *info = file->private_data;
8184 struct trace_iterator *iter = &info->iter;
8185 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8186 struct page *pages_def[PIPE_DEF_BUFFERS];
8187 struct splice_pipe_desc spd = {
8188 .pages = pages_def,
8189 .partial = partial_def,
8190 .nr_pages_max = PIPE_DEF_BUFFERS,
8191 .ops = &buffer_pipe_buf_ops,
8192 .spd_release = buffer_spd_release,
8193 };
8194 struct buffer_ref *ref;
8195 int entries, i;
8196 ssize_t ret = 0;
8197
8198 #ifdef CONFIG_TRACER_MAX_TRACE
8199 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8200 return -EBUSY;
8201 #endif
8202
8203 if (*ppos & (PAGE_SIZE - 1))
8204 return -EINVAL;
8205
8206 if (len & (PAGE_SIZE - 1)) {
8207 if (len < PAGE_SIZE)
8208 return -EINVAL;
8209 len &= PAGE_MASK;
8210 }
8211
8212 if (splice_grow_spd(pipe, &spd))
8213 return -ENOMEM;
8214
8215 again:
8216 trace_access_lock(iter->cpu_file);
8217 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8218
8219 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8220 struct page *page;
8221 int r;
8222
8223 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8224 if (!ref) {
8225 ret = -ENOMEM;
8226 break;
8227 }
8228
8229 refcount_set(&ref->refcount, 1);
8230 ref->buffer = iter->array_buffer->buffer;
8231 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8232 if (IS_ERR(ref->page)) {
8233 ret = PTR_ERR(ref->page);
8234 ref->page = NULL;
8235 kfree(ref);
8236 break;
8237 }
8238 ref->cpu = iter->cpu_file;
8239
8240 r = ring_buffer_read_page(ref->buffer, &ref->page,
8241 len, iter->cpu_file, 1);
8242 if (r < 0) {
8243 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8244 ref->page);
8245 kfree(ref);
8246 break;
8247 }
8248
8249 page = virt_to_page(ref->page);
8250
8251 spd.pages[i] = page;
8252 spd.partial[i].len = PAGE_SIZE;
8253 spd.partial[i].offset = 0;
8254 spd.partial[i].private = (unsigned long)ref;
8255 spd.nr_pages++;
8256 *ppos += PAGE_SIZE;
8257
8258 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8259 }
8260
8261 trace_access_unlock(iter->cpu_file);
8262 spd.nr_pages = i;
8263
8264 /* did we read anything? */
8265 if (!spd.nr_pages) {
8266 if (ret)
8267 goto out;
8268
8269 ret = -EAGAIN;
8270 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8271 goto out;
8272
8273 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8274 if (ret)
8275 goto out;
8276
8277 goto again;
8278 }
8279
8280 ret = splice_to_pipe(pipe, &spd);
8281 out:
8282 splice_shrink_spd(&spd);
8283
8284 return ret;
8285 }
8286
8287 static const struct file_operations tracing_buffers_fops = {
8288 .open = tracing_buffers_open,
8289 .read = tracing_buffers_read,
8290 .poll = tracing_buffers_poll,
8291 .release = tracing_buffers_release,
8292 .splice_read = tracing_buffers_splice_read,
8293 .llseek = no_llseek,
8294 };
8295
8296 static ssize_t
8297 tracing_stats_read(struct file *filp, char __user *ubuf,
8298 size_t count, loff_t *ppos)
8299 {
8300 struct inode *inode = file_inode(filp);
8301 struct trace_array *tr = inode->i_private;
8302 struct array_buffer *trace_buf = &tr->array_buffer;
8303 int cpu = tracing_get_cpu(inode);
8304 struct trace_seq *s;
8305 unsigned long cnt;
8306 unsigned long long t;
8307 unsigned long usec_rem;
8308
8309 s = kmalloc(sizeof(*s), GFP_KERNEL);
8310 if (!s)
8311 return -ENOMEM;
8312
8313 trace_seq_init(s);
8314
8315 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8316 trace_seq_printf(s, "entries: %ld\n", cnt);
8317
8318 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8319 trace_seq_printf(s, "overrun: %ld\n", cnt);
8320
8321 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8322 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8323
8324 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8325 trace_seq_printf(s, "bytes: %ld\n", cnt);
8326
8327 if (trace_clocks[tr->clock_id].in_ns) {
8328 /* local or global for trace_clock */
8329 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8330 usec_rem = do_div(t, USEC_PER_SEC);
8331 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8332 t, usec_rem);
8333
8334 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8335 usec_rem = do_div(t, USEC_PER_SEC);
8336 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8337 } else {
8338 /* counter or tsc mode for trace_clock */
8339 trace_seq_printf(s, "oldest event ts: %llu\n",
8340 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8341
8342 trace_seq_printf(s, "now ts: %llu\n",
8343 ring_buffer_time_stamp(trace_buf->buffer));
8344 }
8345
8346 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8347 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8348
8349 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8350 trace_seq_printf(s, "read events: %ld\n", cnt);
8351
8352 count = simple_read_from_buffer(ubuf, count, ppos,
8353 s->buffer, trace_seq_used(s));
8354
8355 kfree(s);
8356
8357 return count;
8358 }
8359
8360 static const struct file_operations tracing_stats_fops = {
8361 .open = tracing_open_generic_tr,
8362 .read = tracing_stats_read,
8363 .llseek = generic_file_llseek,
8364 .release = tracing_release_generic_tr,
8365 };
8366
8367 #ifdef CONFIG_DYNAMIC_FTRACE
8368
8369 static ssize_t
8370 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8371 size_t cnt, loff_t *ppos)
8372 {
8373 ssize_t ret;
8374 char *buf;
8375 int r;
8376
8377 /* 256 should be plenty to hold the amount needed */
8378 buf = kmalloc(256, GFP_KERNEL);
8379 if (!buf)
8380 return -ENOMEM;
8381
8382 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8383 ftrace_update_tot_cnt,
8384 ftrace_number_of_pages,
8385 ftrace_number_of_groups);
8386
8387 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8388 kfree(buf);
8389 return ret;
8390 }
8391
8392 static const struct file_operations tracing_dyn_info_fops = {
8393 .open = tracing_open_generic,
8394 .read = tracing_read_dyn_info,
8395 .llseek = generic_file_llseek,
8396 };
8397 #endif /* CONFIG_DYNAMIC_FTRACE */
8398
8399 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8400 static void
8401 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8402 struct trace_array *tr, struct ftrace_probe_ops *ops,
8403 void *data)
8404 {
8405 tracing_snapshot_instance(tr);
8406 }
8407
8408 static void
8409 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8410 struct trace_array *tr, struct ftrace_probe_ops *ops,
8411 void *data)
8412 {
8413 struct ftrace_func_mapper *mapper = data;
8414 long *count = NULL;
8415
8416 if (mapper)
8417 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8418
8419 if (count) {
8420
8421 if (*count <= 0)
8422 return;
8423
8424 (*count)--;
8425 }
8426
8427 tracing_snapshot_instance(tr);
8428 }
8429
8430 static int
8431 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8432 struct ftrace_probe_ops *ops, void *data)
8433 {
8434 struct ftrace_func_mapper *mapper = data;
8435 long *count = NULL;
8436
8437 seq_printf(m, "%ps:", (void *)ip);
8438
8439 seq_puts(m, "snapshot");
8440
8441 if (mapper)
8442 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8443
8444 if (count)
8445 seq_printf(m, ":count=%ld\n", *count);
8446 else
8447 seq_puts(m, ":unlimited\n");
8448
8449 return 0;
8450 }
8451
8452 static int
8453 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8454 unsigned long ip, void *init_data, void **data)
8455 {
8456 struct ftrace_func_mapper *mapper = *data;
8457
8458 if (!mapper) {
8459 mapper = allocate_ftrace_func_mapper();
8460 if (!mapper)
8461 return -ENOMEM;
8462 *data = mapper;
8463 }
8464
8465 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8466 }
8467
8468 static void
8469 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8470 unsigned long ip, void *data)
8471 {
8472 struct ftrace_func_mapper *mapper = data;
8473
8474 if (!ip) {
8475 if (!mapper)
8476 return;
8477 free_ftrace_func_mapper(mapper, NULL);
8478 return;
8479 }
8480
8481 ftrace_func_mapper_remove_ip(mapper, ip);
8482 }
8483
8484 static struct ftrace_probe_ops snapshot_probe_ops = {
8485 .func = ftrace_snapshot,
8486 .print = ftrace_snapshot_print,
8487 };
8488
8489 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8490 .func = ftrace_count_snapshot,
8491 .print = ftrace_snapshot_print,
8492 .init = ftrace_snapshot_init,
8493 .free = ftrace_snapshot_free,
8494 };
8495
8496 static int
8497 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8498 char *glob, char *cmd, char *param, int enable)
8499 {
8500 struct ftrace_probe_ops *ops;
8501 void *count = (void *)-1;
8502 char *number;
8503 int ret;
8504
8505 if (!tr)
8506 return -ENODEV;
8507
8508 /* hash funcs only work with set_ftrace_filter */
8509 if (!enable)
8510 return -EINVAL;
8511
8512 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8513
8514 if (glob[0] == '!')
8515 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8516
8517 if (!param)
8518 goto out_reg;
8519
8520 number = strsep(&param, ":");
8521
8522 if (!strlen(number))
8523 goto out_reg;
8524
8525 /*
8526 * We use the callback data field (which is a pointer)
8527 * as our counter.
8528 */
8529 ret = kstrtoul(number, 0, (unsigned long *)&count);
8530 if (ret)
8531 return ret;
8532
8533 out_reg:
8534 ret = tracing_alloc_snapshot_instance(tr);
8535 if (ret < 0)
8536 goto out;
8537
8538 ret = register_ftrace_function_probe(glob, tr, ops, count);
8539
8540 out:
8541 return ret < 0 ? ret : 0;
8542 }
8543
8544 static struct ftrace_func_command ftrace_snapshot_cmd = {
8545 .name = "snapshot",
8546 .func = ftrace_trace_snapshot_callback,
8547 };
8548
8549 static __init int register_snapshot_cmd(void)
8550 {
8551 return register_ftrace_command(&ftrace_snapshot_cmd);
8552 }
8553 #else
8554 static inline __init int register_snapshot_cmd(void) { return 0; }
8555 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8556
8557 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8558 {
8559 if (WARN_ON(!tr->dir))
8560 return ERR_PTR(-ENODEV);
8561
8562 /* Top directory uses NULL as the parent */
8563 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8564 return NULL;
8565
8566 /* All sub buffers have a descriptor */
8567 return tr->dir;
8568 }
8569
8570 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8571 {
8572 struct dentry *d_tracer;
8573
8574 if (tr->percpu_dir)
8575 return tr->percpu_dir;
8576
8577 d_tracer = tracing_get_dentry(tr);
8578 if (IS_ERR(d_tracer))
8579 return NULL;
8580
8581 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8582
8583 MEM_FAIL(!tr->percpu_dir,
8584 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8585
8586 return tr->percpu_dir;
8587 }
8588
8589 static struct dentry *
8590 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8591 void *data, long cpu, const struct file_operations *fops)
8592 {
8593 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8594
8595 if (ret) /* See tracing_get_cpu() */
8596 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8597 return ret;
8598 }
8599
8600 static void
8601 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8602 {
8603 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8604 struct dentry *d_cpu;
8605 char cpu_dir[30]; /* 30 characters should be more than enough */
8606
8607 if (!d_percpu)
8608 return;
8609
8610 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8611 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8612 if (!d_cpu) {
8613 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8614 return;
8615 }
8616
8617 /* per cpu trace_pipe */
8618 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8619 tr, cpu, &tracing_pipe_fops);
8620
8621 /* per cpu trace */
8622 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8623 tr, cpu, &tracing_fops);
8624
8625 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8626 tr, cpu, &tracing_buffers_fops);
8627
8628 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8629 tr, cpu, &tracing_stats_fops);
8630
8631 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8632 tr, cpu, &tracing_entries_fops);
8633
8634 #ifdef CONFIG_TRACER_SNAPSHOT
8635 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8636 tr, cpu, &snapshot_fops);
8637
8638 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8639 tr, cpu, &snapshot_raw_fops);
8640 #endif
8641 }
8642
8643 #ifdef CONFIG_FTRACE_SELFTEST
8644 /* Let selftest have access to static functions in this file */
8645 #include "trace_selftest.c"
8646 #endif
8647
8648 static ssize_t
8649 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8650 loff_t *ppos)
8651 {
8652 struct trace_option_dentry *topt = filp->private_data;
8653 char *buf;
8654
8655 if (topt->flags->val & topt->opt->bit)
8656 buf = "1\n";
8657 else
8658 buf = "0\n";
8659
8660 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8661 }
8662
8663 static ssize_t
8664 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8665 loff_t *ppos)
8666 {
8667 struct trace_option_dentry *topt = filp->private_data;
8668 unsigned long val;
8669 int ret;
8670
8671 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8672 if (ret)
8673 return ret;
8674
8675 if (val != 0 && val != 1)
8676 return -EINVAL;
8677
8678 if (!!(topt->flags->val & topt->opt->bit) != val) {
8679 mutex_lock(&trace_types_lock);
8680 ret = __set_tracer_option(topt->tr, topt->flags,
8681 topt->opt, !val);
8682 mutex_unlock(&trace_types_lock);
8683 if (ret)
8684 return ret;
8685 }
8686
8687 *ppos += cnt;
8688
8689 return cnt;
8690 }
8691
8692
8693 static const struct file_operations trace_options_fops = {
8694 .open = tracing_open_generic,
8695 .read = trace_options_read,
8696 .write = trace_options_write,
8697 .llseek = generic_file_llseek,
8698 };
8699
8700 /*
8701 * In order to pass in both the trace_array descriptor as well as the index
8702 * to the flag that the trace option file represents, the trace_array
8703 * has a character array of trace_flags_index[], which holds the index
8704 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8705 * The address of this character array is passed to the flag option file
8706 * read/write callbacks.
8707 *
8708 * In order to extract both the index and the trace_array descriptor,
8709 * get_tr_index() uses the following algorithm.
8710 *
8711 * idx = *ptr;
8712 *
8713 * As the pointer itself contains the address of the index (remember
8714 * index[1] == 1).
8715 *
8716 * Then to get the trace_array descriptor, by subtracting that index
8717 * from the ptr, we get to the start of the index itself.
8718 *
8719 * ptr - idx == &index[0]
8720 *
8721 * Then a simple container_of() from that pointer gets us to the
8722 * trace_array descriptor.
8723 */
8724 static void get_tr_index(void *data, struct trace_array **ptr,
8725 unsigned int *pindex)
8726 {
8727 *pindex = *(unsigned char *)data;
8728
8729 *ptr = container_of(data - *pindex, struct trace_array,
8730 trace_flags_index);
8731 }
8732
8733 static ssize_t
8734 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8735 loff_t *ppos)
8736 {
8737 void *tr_index = filp->private_data;
8738 struct trace_array *tr;
8739 unsigned int index;
8740 char *buf;
8741
8742 get_tr_index(tr_index, &tr, &index);
8743
8744 if (tr->trace_flags & (1 << index))
8745 buf = "1\n";
8746 else
8747 buf = "0\n";
8748
8749 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8750 }
8751
8752 static ssize_t
8753 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8754 loff_t *ppos)
8755 {
8756 void *tr_index = filp->private_data;
8757 struct trace_array *tr;
8758 unsigned int index;
8759 unsigned long val;
8760 int ret;
8761
8762 get_tr_index(tr_index, &tr, &index);
8763
8764 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8765 if (ret)
8766 return ret;
8767
8768 if (val != 0 && val != 1)
8769 return -EINVAL;
8770
8771 mutex_lock(&event_mutex);
8772 mutex_lock(&trace_types_lock);
8773 ret = set_tracer_flag(tr, 1 << index, val);
8774 mutex_unlock(&trace_types_lock);
8775 mutex_unlock(&event_mutex);
8776
8777 if (ret < 0)
8778 return ret;
8779
8780 *ppos += cnt;
8781
8782 return cnt;
8783 }
8784
8785 static const struct file_operations trace_options_core_fops = {
8786 .open = tracing_open_generic,
8787 .read = trace_options_core_read,
8788 .write = trace_options_core_write,
8789 .llseek = generic_file_llseek,
8790 };
8791
8792 struct dentry *trace_create_file(const char *name,
8793 umode_t mode,
8794 struct dentry *parent,
8795 void *data,
8796 const struct file_operations *fops)
8797 {
8798 struct dentry *ret;
8799
8800 ret = tracefs_create_file(name, mode, parent, data, fops);
8801 if (!ret)
8802 pr_warn("Could not create tracefs '%s' entry\n", name);
8803
8804 return ret;
8805 }
8806
8807
8808 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8809 {
8810 struct dentry *d_tracer;
8811
8812 if (tr->options)
8813 return tr->options;
8814
8815 d_tracer = tracing_get_dentry(tr);
8816 if (IS_ERR(d_tracer))
8817 return NULL;
8818
8819 tr->options = tracefs_create_dir("options", d_tracer);
8820 if (!tr->options) {
8821 pr_warn("Could not create tracefs directory 'options'\n");
8822 return NULL;
8823 }
8824
8825 return tr->options;
8826 }
8827
8828 static void
8829 create_trace_option_file(struct trace_array *tr,
8830 struct trace_option_dentry *topt,
8831 struct tracer_flags *flags,
8832 struct tracer_opt *opt)
8833 {
8834 struct dentry *t_options;
8835
8836 t_options = trace_options_init_dentry(tr);
8837 if (!t_options)
8838 return;
8839
8840 topt->flags = flags;
8841 topt->opt = opt;
8842 topt->tr = tr;
8843
8844 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8845 t_options, topt, &trace_options_fops);
8846
8847 }
8848
8849 static void
8850 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8851 {
8852 struct trace_option_dentry *topts;
8853 struct trace_options *tr_topts;
8854 struct tracer_flags *flags;
8855 struct tracer_opt *opts;
8856 int cnt;
8857 int i;
8858
8859 if (!tracer)
8860 return;
8861
8862 flags = tracer->flags;
8863
8864 if (!flags || !flags->opts)
8865 return;
8866
8867 /*
8868 * If this is an instance, only create flags for tracers
8869 * the instance may have.
8870 */
8871 if (!trace_ok_for_array(tracer, tr))
8872 return;
8873
8874 for (i = 0; i < tr->nr_topts; i++) {
8875 /* Make sure there's no duplicate flags. */
8876 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8877 return;
8878 }
8879
8880 opts = flags->opts;
8881
8882 for (cnt = 0; opts[cnt].name; cnt++)
8883 ;
8884
8885 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8886 if (!topts)
8887 return;
8888
8889 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8890 GFP_KERNEL);
8891 if (!tr_topts) {
8892 kfree(topts);
8893 return;
8894 }
8895
8896 tr->topts = tr_topts;
8897 tr->topts[tr->nr_topts].tracer = tracer;
8898 tr->topts[tr->nr_topts].topts = topts;
8899 tr->nr_topts++;
8900
8901 for (cnt = 0; opts[cnt].name; cnt++) {
8902 create_trace_option_file(tr, &topts[cnt], flags,
8903 &opts[cnt]);
8904 MEM_FAIL(topts[cnt].entry == NULL,
8905 "Failed to create trace option: %s",
8906 opts[cnt].name);
8907 }
8908 }
8909
8910 static struct dentry *
8911 create_trace_option_core_file(struct trace_array *tr,
8912 const char *option, long index)
8913 {
8914 struct dentry *t_options;
8915
8916 t_options = trace_options_init_dentry(tr);
8917 if (!t_options)
8918 return NULL;
8919
8920 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8921 (void *)&tr->trace_flags_index[index],
8922 &trace_options_core_fops);
8923 }
8924
8925 static void create_trace_options_dir(struct trace_array *tr)
8926 {
8927 struct dentry *t_options;
8928 bool top_level = tr == &global_trace;
8929 int i;
8930
8931 t_options = trace_options_init_dentry(tr);
8932 if (!t_options)
8933 return;
8934
8935 for (i = 0; trace_options[i]; i++) {
8936 if (top_level ||
8937 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8938 create_trace_option_core_file(tr, trace_options[i], i);
8939 }
8940 }
8941
8942 static ssize_t
8943 rb_simple_read(struct file *filp, char __user *ubuf,
8944 size_t cnt, loff_t *ppos)
8945 {
8946 struct trace_array *tr = filp->private_data;
8947 char buf[64];
8948 int r;
8949
8950 r = tracer_tracing_is_on(tr);
8951 r = sprintf(buf, "%d\n", r);
8952
8953 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8954 }
8955
8956 static ssize_t
8957 rb_simple_write(struct file *filp, const char __user *ubuf,
8958 size_t cnt, loff_t *ppos)
8959 {
8960 struct trace_array *tr = filp->private_data;
8961 struct trace_buffer *buffer = tr->array_buffer.buffer;
8962 unsigned long val;
8963 int ret;
8964
8965 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8966 if (ret)
8967 return ret;
8968
8969 if (buffer) {
8970 mutex_lock(&trace_types_lock);
8971 if (!!val == tracer_tracing_is_on(tr)) {
8972 val = 0; /* do nothing */
8973 } else if (val) {
8974 tracer_tracing_on(tr);
8975 if (tr->current_trace->start)
8976 tr->current_trace->start(tr);
8977 } else {
8978 tracer_tracing_off(tr);
8979 if (tr->current_trace->stop)
8980 tr->current_trace->stop(tr);
8981 }
8982 mutex_unlock(&trace_types_lock);
8983 }
8984
8985 (*ppos)++;
8986
8987 return cnt;
8988 }
8989
8990 static const struct file_operations rb_simple_fops = {
8991 .open = tracing_open_generic_tr,
8992 .read = rb_simple_read,
8993 .write = rb_simple_write,
8994 .release = tracing_release_generic_tr,
8995 .llseek = default_llseek,
8996 };
8997
8998 static ssize_t
8999 buffer_percent_read(struct file *filp, char __user *ubuf,
9000 size_t cnt, loff_t *ppos)
9001 {
9002 struct trace_array *tr = filp->private_data;
9003 char buf[64];
9004 int r;
9005
9006 r = tr->buffer_percent;
9007 r = sprintf(buf, "%d\n", r);
9008
9009 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9010 }
9011
9012 static ssize_t
9013 buffer_percent_write(struct file *filp, const char __user *ubuf,
9014 size_t cnt, loff_t *ppos)
9015 {
9016 struct trace_array *tr = filp->private_data;
9017 unsigned long val;
9018 int ret;
9019
9020 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9021 if (ret)
9022 return ret;
9023
9024 if (val > 100)
9025 return -EINVAL;
9026
9027 if (!val)
9028 val = 1;
9029
9030 tr->buffer_percent = val;
9031
9032 (*ppos)++;
9033
9034 return cnt;
9035 }
9036
9037 static const struct file_operations buffer_percent_fops = {
9038 .open = tracing_open_generic_tr,
9039 .read = buffer_percent_read,
9040 .write = buffer_percent_write,
9041 .release = tracing_release_generic_tr,
9042 .llseek = default_llseek,
9043 };
9044
9045 static struct dentry *trace_instance_dir;
9046
9047 static void
9048 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9049
9050 static int
9051 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9052 {
9053 enum ring_buffer_flags rb_flags;
9054
9055 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9056
9057 buf->tr = tr;
9058
9059 buf->buffer = ring_buffer_alloc(size, rb_flags);
9060 if (!buf->buffer)
9061 return -ENOMEM;
9062
9063 buf->data = alloc_percpu(struct trace_array_cpu);
9064 if (!buf->data) {
9065 ring_buffer_free(buf->buffer);
9066 buf->buffer = NULL;
9067 return -ENOMEM;
9068 }
9069
9070 /* Allocate the first page for all buffers */
9071 set_buffer_entries(&tr->array_buffer,
9072 ring_buffer_size(tr->array_buffer.buffer, 0));
9073
9074 return 0;
9075 }
9076
9077 static int allocate_trace_buffers(struct trace_array *tr, int size)
9078 {
9079 int ret;
9080
9081 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9082 if (ret)
9083 return ret;
9084
9085 #ifdef CONFIG_TRACER_MAX_TRACE
9086 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9087 allocate_snapshot ? size : 1);
9088 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9089 ring_buffer_free(tr->array_buffer.buffer);
9090 tr->array_buffer.buffer = NULL;
9091 free_percpu(tr->array_buffer.data);
9092 tr->array_buffer.data = NULL;
9093 return -ENOMEM;
9094 }
9095 tr->allocated_snapshot = allocate_snapshot;
9096
9097 /*
9098 * Only the top level trace array gets its snapshot allocated
9099 * from the kernel command line.
9100 */
9101 allocate_snapshot = false;
9102 #endif
9103
9104 return 0;
9105 }
9106
9107 static void free_trace_buffer(struct array_buffer *buf)
9108 {
9109 if (buf->buffer) {
9110 ring_buffer_free(buf->buffer);
9111 buf->buffer = NULL;
9112 free_percpu(buf->data);
9113 buf->data = NULL;
9114 }
9115 }
9116
9117 static void free_trace_buffers(struct trace_array *tr)
9118 {
9119 if (!tr)
9120 return;
9121
9122 free_trace_buffer(&tr->array_buffer);
9123
9124 #ifdef CONFIG_TRACER_MAX_TRACE
9125 free_trace_buffer(&tr->max_buffer);
9126 #endif
9127 }
9128
9129 static void init_trace_flags_index(struct trace_array *tr)
9130 {
9131 int i;
9132
9133 /* Used by the trace options files */
9134 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9135 tr->trace_flags_index[i] = i;
9136 }
9137
9138 static void __update_tracer_options(struct trace_array *tr)
9139 {
9140 struct tracer *t;
9141
9142 for (t = trace_types; t; t = t->next)
9143 add_tracer_options(tr, t);
9144 }
9145
9146 static void update_tracer_options(struct trace_array *tr)
9147 {
9148 mutex_lock(&trace_types_lock);
9149 __update_tracer_options(tr);
9150 mutex_unlock(&trace_types_lock);
9151 }
9152
9153 /* Must have trace_types_lock held */
9154 struct trace_array *trace_array_find(const char *instance)
9155 {
9156 struct trace_array *tr, *found = NULL;
9157
9158 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9159 if (tr->name && strcmp(tr->name, instance) == 0) {
9160 found = tr;
9161 break;
9162 }
9163 }
9164
9165 return found;
9166 }
9167
9168 struct trace_array *trace_array_find_get(const char *instance)
9169 {
9170 struct trace_array *tr;
9171
9172 mutex_lock(&trace_types_lock);
9173 tr = trace_array_find(instance);
9174 if (tr)
9175 tr->ref++;
9176 mutex_unlock(&trace_types_lock);
9177
9178 return tr;
9179 }
9180
9181 static int trace_array_create_dir(struct trace_array *tr)
9182 {
9183 int ret;
9184
9185 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9186 if (!tr->dir)
9187 return -EINVAL;
9188
9189 ret = event_trace_add_tracer(tr->dir, tr);
9190 if (ret) {
9191 tracefs_remove(tr->dir);
9192 return ret;
9193 }
9194
9195 init_tracer_tracefs(tr, tr->dir);
9196 __update_tracer_options(tr);
9197
9198 return ret;
9199 }
9200
9201 static struct trace_array *trace_array_create(const char *name)
9202 {
9203 struct trace_array *tr;
9204 int ret;
9205
9206 ret = -ENOMEM;
9207 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9208 if (!tr)
9209 return ERR_PTR(ret);
9210
9211 tr->name = kstrdup(name, GFP_KERNEL);
9212 if (!tr->name)
9213 goto out_free_tr;
9214
9215 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9216 goto out_free_tr;
9217
9218 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9219
9220 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9221
9222 raw_spin_lock_init(&tr->start_lock);
9223
9224 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9225
9226 tr->current_trace = &nop_trace;
9227
9228 INIT_LIST_HEAD(&tr->systems);
9229 INIT_LIST_HEAD(&tr->events);
9230 INIT_LIST_HEAD(&tr->hist_vars);
9231 INIT_LIST_HEAD(&tr->err_log);
9232
9233 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9234 goto out_free_tr;
9235
9236 if (ftrace_allocate_ftrace_ops(tr) < 0)
9237 goto out_free_tr;
9238
9239 ftrace_init_trace_array(tr);
9240
9241 init_trace_flags_index(tr);
9242
9243 if (trace_instance_dir) {
9244 ret = trace_array_create_dir(tr);
9245 if (ret)
9246 goto out_free_tr;
9247 } else
9248 __trace_early_add_events(tr);
9249
9250 list_add(&tr->list, &ftrace_trace_arrays);
9251
9252 tr->ref++;
9253
9254 return tr;
9255
9256 out_free_tr:
9257 ftrace_free_ftrace_ops(tr);
9258 free_trace_buffers(tr);
9259 free_cpumask_var(tr->tracing_cpumask);
9260 kfree(tr->name);
9261 kfree(tr);
9262
9263 return ERR_PTR(ret);
9264 }
9265
9266 static int instance_mkdir(const char *name)
9267 {
9268 struct trace_array *tr;
9269 int ret;
9270
9271 mutex_lock(&event_mutex);
9272 mutex_lock(&trace_types_lock);
9273
9274 ret = -EEXIST;
9275 if (trace_array_find(name))
9276 goto out_unlock;
9277
9278 tr = trace_array_create(name);
9279
9280 ret = PTR_ERR_OR_ZERO(tr);
9281
9282 out_unlock:
9283 mutex_unlock(&trace_types_lock);
9284 mutex_unlock(&event_mutex);
9285 return ret;
9286 }
9287
9288 /**
9289 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9290 * @name: The name of the trace array to be looked up/created.
9291 *
9292 * Returns pointer to trace array with given name.
9293 * NULL, if it cannot be created.
9294 *
9295 * NOTE: This function increments the reference counter associated with the
9296 * trace array returned. This makes sure it cannot be freed while in use.
9297 * Use trace_array_put() once the trace array is no longer needed.
9298 * If the trace_array is to be freed, trace_array_destroy() needs to
9299 * be called after the trace_array_put(), or simply let user space delete
9300 * it from the tracefs instances directory. But until the
9301 * trace_array_put() is called, user space can not delete it.
9302 *
9303 */
9304 struct trace_array *trace_array_get_by_name(const char *name)
9305 {
9306 struct trace_array *tr;
9307
9308 mutex_lock(&event_mutex);
9309 mutex_lock(&trace_types_lock);
9310
9311 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9312 if (tr->name && strcmp(tr->name, name) == 0)
9313 goto out_unlock;
9314 }
9315
9316 tr = trace_array_create(name);
9317
9318 if (IS_ERR(tr))
9319 tr = NULL;
9320 out_unlock:
9321 if (tr)
9322 tr->ref++;
9323
9324 mutex_unlock(&trace_types_lock);
9325 mutex_unlock(&event_mutex);
9326 return tr;
9327 }
9328 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9329
9330 static int __remove_instance(struct trace_array *tr)
9331 {
9332 int i;
9333
9334 /* Reference counter for a newly created trace array = 1. */
9335 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9336 return -EBUSY;
9337
9338 list_del(&tr->list);
9339
9340 /* Disable all the flags that were enabled coming in */
9341 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9342 if ((1 << i) & ZEROED_TRACE_FLAGS)
9343 set_tracer_flag(tr, 1 << i, 0);
9344 }
9345
9346 tracing_set_nop(tr);
9347 clear_ftrace_function_probes(tr);
9348 event_trace_del_tracer(tr);
9349 ftrace_clear_pids(tr);
9350 ftrace_destroy_function_files(tr);
9351 tracefs_remove(tr->dir);
9352 free_percpu(tr->last_func_repeats);
9353 free_trace_buffers(tr);
9354
9355 for (i = 0; i < tr->nr_topts; i++) {
9356 kfree(tr->topts[i].topts);
9357 }
9358 kfree(tr->topts);
9359
9360 free_cpumask_var(tr->tracing_cpumask);
9361 kfree(tr->name);
9362 kfree(tr);
9363
9364 return 0;
9365 }
9366
9367 int trace_array_destroy(struct trace_array *this_tr)
9368 {
9369 struct trace_array *tr;
9370 int ret;
9371
9372 if (!this_tr)
9373 return -EINVAL;
9374
9375 mutex_lock(&event_mutex);
9376 mutex_lock(&trace_types_lock);
9377
9378 ret = -ENODEV;
9379
9380 /* Making sure trace array exists before destroying it. */
9381 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9382 if (tr == this_tr) {
9383 ret = __remove_instance(tr);
9384 break;
9385 }
9386 }
9387
9388 mutex_unlock(&trace_types_lock);
9389 mutex_unlock(&event_mutex);
9390
9391 return ret;
9392 }
9393 EXPORT_SYMBOL_GPL(trace_array_destroy);
9394
9395 static int instance_rmdir(const char *name)
9396 {
9397 struct trace_array *tr;
9398 int ret;
9399
9400 mutex_lock(&event_mutex);
9401 mutex_lock(&trace_types_lock);
9402
9403 ret = -ENODEV;
9404 tr = trace_array_find(name);
9405 if (tr)
9406 ret = __remove_instance(tr);
9407
9408 mutex_unlock(&trace_types_lock);
9409 mutex_unlock(&event_mutex);
9410
9411 return ret;
9412 }
9413
9414 static __init void create_trace_instances(struct dentry *d_tracer)
9415 {
9416 struct trace_array *tr;
9417
9418 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9419 instance_mkdir,
9420 instance_rmdir);
9421 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9422 return;
9423
9424 mutex_lock(&event_mutex);
9425 mutex_lock(&trace_types_lock);
9426
9427 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9428 if (!tr->name)
9429 continue;
9430 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9431 "Failed to create instance directory\n"))
9432 break;
9433 }
9434
9435 mutex_unlock(&trace_types_lock);
9436 mutex_unlock(&event_mutex);
9437 }
9438
9439 static void
9440 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9441 {
9442 struct trace_event_file *file;
9443 int cpu;
9444
9445 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9446 tr, &show_traces_fops);
9447
9448 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9449 tr, &set_tracer_fops);
9450
9451 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9452 tr, &tracing_cpumask_fops);
9453
9454 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9455 tr, &tracing_iter_fops);
9456
9457 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9458 tr, &tracing_fops);
9459
9460 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9461 tr, &tracing_pipe_fops);
9462
9463 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9464 tr, &tracing_entries_fops);
9465
9466 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9467 tr, &tracing_total_entries_fops);
9468
9469 trace_create_file("free_buffer", 0200, d_tracer,
9470 tr, &tracing_free_buffer_fops);
9471
9472 trace_create_file("trace_marker", 0220, d_tracer,
9473 tr, &tracing_mark_fops);
9474
9475 file = __find_event_file(tr, "ftrace", "print");
9476 if (file && file->dir)
9477 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9478 file, &event_trigger_fops);
9479 tr->trace_marker_file = file;
9480
9481 trace_create_file("trace_marker_raw", 0220, d_tracer,
9482 tr, &tracing_mark_raw_fops);
9483
9484 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9485 &trace_clock_fops);
9486
9487 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9488 tr, &rb_simple_fops);
9489
9490 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9491 &trace_time_stamp_mode_fops);
9492
9493 tr->buffer_percent = 50;
9494
9495 trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9496 tr, &buffer_percent_fops);
9497
9498 create_trace_options_dir(tr);
9499
9500 trace_create_maxlat_file(tr, d_tracer);
9501
9502 if (ftrace_create_function_files(tr, d_tracer))
9503 MEM_FAIL(1, "Could not allocate function filter files");
9504
9505 #ifdef CONFIG_TRACER_SNAPSHOT
9506 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9507 tr, &snapshot_fops);
9508 #endif
9509
9510 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9511 tr, &tracing_err_log_fops);
9512
9513 for_each_tracing_cpu(cpu)
9514 tracing_init_tracefs_percpu(tr, cpu);
9515
9516 ftrace_init_tracefs(tr, d_tracer);
9517 }
9518
9519 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9520 {
9521 struct vfsmount *mnt;
9522 struct file_system_type *type;
9523
9524 /*
9525 * To maintain backward compatibility for tools that mount
9526 * debugfs to get to the tracing facility, tracefs is automatically
9527 * mounted to the debugfs/tracing directory.
9528 */
9529 type = get_fs_type("tracefs");
9530 if (!type)
9531 return NULL;
9532 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9533 put_filesystem(type);
9534 if (IS_ERR(mnt))
9535 return NULL;
9536 mntget(mnt);
9537
9538 return mnt;
9539 }
9540
9541 /**
9542 * tracing_init_dentry - initialize top level trace array
9543 *
9544 * This is called when creating files or directories in the tracing
9545 * directory. It is called via fs_initcall() by any of the boot up code
9546 * and expects to return the dentry of the top level tracing directory.
9547 */
9548 int tracing_init_dentry(void)
9549 {
9550 struct trace_array *tr = &global_trace;
9551
9552 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9553 pr_warn("Tracing disabled due to lockdown\n");
9554 return -EPERM;
9555 }
9556
9557 /* The top level trace array uses NULL as parent */
9558 if (tr->dir)
9559 return 0;
9560
9561 if (WARN_ON(!tracefs_initialized()))
9562 return -ENODEV;
9563
9564 /*
9565 * As there may still be users that expect the tracing
9566 * files to exist in debugfs/tracing, we must automount
9567 * the tracefs file system there, so older tools still
9568 * work with the newer kernel.
9569 */
9570 tr->dir = debugfs_create_automount("tracing", NULL,
9571 trace_automount, NULL);
9572
9573 return 0;
9574 }
9575
9576 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9577 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9578
9579 static struct workqueue_struct *eval_map_wq __initdata;
9580 static struct work_struct eval_map_work __initdata;
9581
9582 static void __init eval_map_work_func(struct work_struct *work)
9583 {
9584 int len;
9585
9586 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9587 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9588 }
9589
9590 static int __init trace_eval_init(void)
9591 {
9592 INIT_WORK(&eval_map_work, eval_map_work_func);
9593
9594 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9595 if (!eval_map_wq) {
9596 pr_err("Unable to allocate eval_map_wq\n");
9597 /* Do work here */
9598 eval_map_work_func(&eval_map_work);
9599 return -ENOMEM;
9600 }
9601
9602 queue_work(eval_map_wq, &eval_map_work);
9603 return 0;
9604 }
9605
9606 static int __init trace_eval_sync(void)
9607 {
9608 /* Make sure the eval map updates are finished */
9609 if (eval_map_wq)
9610 destroy_workqueue(eval_map_wq);
9611 return 0;
9612 }
9613
9614 late_initcall_sync(trace_eval_sync);
9615
9616
9617 #ifdef CONFIG_MODULES
9618 static void trace_module_add_evals(struct module *mod)
9619 {
9620 if (!mod->num_trace_evals)
9621 return;
9622
9623 /*
9624 * Modules with bad taint do not have events created, do
9625 * not bother with enums either.
9626 */
9627 if (trace_module_has_bad_taint(mod))
9628 return;
9629
9630 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9631 }
9632
9633 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9634 static void trace_module_remove_evals(struct module *mod)
9635 {
9636 union trace_eval_map_item *map;
9637 union trace_eval_map_item **last = &trace_eval_maps;
9638
9639 if (!mod->num_trace_evals)
9640 return;
9641
9642 mutex_lock(&trace_eval_mutex);
9643
9644 map = trace_eval_maps;
9645
9646 while (map) {
9647 if (map->head.mod == mod)
9648 break;
9649 map = trace_eval_jmp_to_tail(map);
9650 last = &map->tail.next;
9651 map = map->tail.next;
9652 }
9653 if (!map)
9654 goto out;
9655
9656 *last = trace_eval_jmp_to_tail(map)->tail.next;
9657 kfree(map);
9658 out:
9659 mutex_unlock(&trace_eval_mutex);
9660 }
9661 #else
9662 static inline void trace_module_remove_evals(struct module *mod) { }
9663 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9664
9665 static int trace_module_notify(struct notifier_block *self,
9666 unsigned long val, void *data)
9667 {
9668 struct module *mod = data;
9669
9670 switch (val) {
9671 case MODULE_STATE_COMING:
9672 trace_module_add_evals(mod);
9673 break;
9674 case MODULE_STATE_GOING:
9675 trace_module_remove_evals(mod);
9676 break;
9677 }
9678
9679 return NOTIFY_OK;
9680 }
9681
9682 static struct notifier_block trace_module_nb = {
9683 .notifier_call = trace_module_notify,
9684 .priority = 0,
9685 };
9686 #endif /* CONFIG_MODULES */
9687
9688 static __init int tracer_init_tracefs(void)
9689 {
9690 int ret;
9691
9692 trace_access_lock_init();
9693
9694 ret = tracing_init_dentry();
9695 if (ret)
9696 return 0;
9697
9698 event_trace_init();
9699
9700 init_tracer_tracefs(&global_trace, NULL);
9701 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9702
9703 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9704 &global_trace, &tracing_thresh_fops);
9705
9706 trace_create_file("README", TRACE_MODE_READ, NULL,
9707 NULL, &tracing_readme_fops);
9708
9709 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9710 NULL, &tracing_saved_cmdlines_fops);
9711
9712 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9713 NULL, &tracing_saved_cmdlines_size_fops);
9714
9715 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9716 NULL, &tracing_saved_tgids_fops);
9717
9718 trace_eval_init();
9719
9720 trace_create_eval_file(NULL);
9721
9722 #ifdef CONFIG_MODULES
9723 register_module_notifier(&trace_module_nb);
9724 #endif
9725
9726 #ifdef CONFIG_DYNAMIC_FTRACE
9727 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9728 NULL, &tracing_dyn_info_fops);
9729 #endif
9730
9731 create_trace_instances(NULL);
9732
9733 update_tracer_options(&global_trace);
9734
9735 return 0;
9736 }
9737
9738 fs_initcall(tracer_init_tracefs);
9739
9740 static int trace_panic_handler(struct notifier_block *this,
9741 unsigned long event, void *unused)
9742 {
9743 if (ftrace_dump_on_oops)
9744 ftrace_dump(ftrace_dump_on_oops);
9745 return NOTIFY_OK;
9746 }
9747
9748 static struct notifier_block trace_panic_notifier = {
9749 .notifier_call = trace_panic_handler,
9750 .next = NULL,
9751 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9752 };
9753
9754 static int trace_die_handler(struct notifier_block *self,
9755 unsigned long val,
9756 void *data)
9757 {
9758 switch (val) {
9759 case DIE_OOPS:
9760 if (ftrace_dump_on_oops)
9761 ftrace_dump(ftrace_dump_on_oops);
9762 break;
9763 default:
9764 break;
9765 }
9766 return NOTIFY_OK;
9767 }
9768
9769 static struct notifier_block trace_die_notifier = {
9770 .notifier_call = trace_die_handler,
9771 .priority = 200
9772 };
9773
9774 /*
9775 * printk is set to max of 1024, we really don't need it that big.
9776 * Nothing should be printing 1000 characters anyway.
9777 */
9778 #define TRACE_MAX_PRINT 1000
9779
9780 /*
9781 * Define here KERN_TRACE so that we have one place to modify
9782 * it if we decide to change what log level the ftrace dump
9783 * should be at.
9784 */
9785 #define KERN_TRACE KERN_EMERG
9786
9787 void
9788 trace_printk_seq(struct trace_seq *s)
9789 {
9790 /* Probably should print a warning here. */
9791 if (s->seq.len >= TRACE_MAX_PRINT)
9792 s->seq.len = TRACE_MAX_PRINT;
9793
9794 /*
9795 * More paranoid code. Although the buffer size is set to
9796 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9797 * an extra layer of protection.
9798 */
9799 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9800 s->seq.len = s->seq.size - 1;
9801
9802 /* should be zero ended, but we are paranoid. */
9803 s->buffer[s->seq.len] = 0;
9804
9805 printk(KERN_TRACE "%s", s->buffer);
9806
9807 trace_seq_init(s);
9808 }
9809
9810 void trace_init_global_iter(struct trace_iterator *iter)
9811 {
9812 iter->tr = &global_trace;
9813 iter->trace = iter->tr->current_trace;
9814 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9815 iter->array_buffer = &global_trace.array_buffer;
9816
9817 if (iter->trace && iter->trace->open)
9818 iter->trace->open(iter);
9819
9820 /* Annotate start of buffers if we had overruns */
9821 if (ring_buffer_overruns(iter->array_buffer->buffer))
9822 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9823
9824 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9825 if (trace_clocks[iter->tr->clock_id].in_ns)
9826 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9827 }
9828
9829 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9830 {
9831 /* use static because iter can be a bit big for the stack */
9832 static struct trace_iterator iter;
9833 static atomic_t dump_running;
9834 struct trace_array *tr = &global_trace;
9835 unsigned int old_userobj;
9836 unsigned long flags;
9837 int cnt = 0, cpu;
9838
9839 /* Only allow one dump user at a time. */
9840 if (atomic_inc_return(&dump_running) != 1) {
9841 atomic_dec(&dump_running);
9842 return;
9843 }
9844
9845 /*
9846 * Always turn off tracing when we dump.
9847 * We don't need to show trace output of what happens
9848 * between multiple crashes.
9849 *
9850 * If the user does a sysrq-z, then they can re-enable
9851 * tracing with echo 1 > tracing_on.
9852 */
9853 tracing_off();
9854
9855 local_irq_save(flags);
9856
9857 /* Simulate the iterator */
9858 trace_init_global_iter(&iter);
9859 /* Can not use kmalloc for iter.temp and iter.fmt */
9860 iter.temp = static_temp_buf;
9861 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9862 iter.fmt = static_fmt_buf;
9863 iter.fmt_size = STATIC_FMT_BUF_SIZE;
9864
9865 for_each_tracing_cpu(cpu) {
9866 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9867 }
9868
9869 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9870
9871 /* don't look at user memory in panic mode */
9872 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9873
9874 switch (oops_dump_mode) {
9875 case DUMP_ALL:
9876 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9877 break;
9878 case DUMP_ORIG:
9879 iter.cpu_file = raw_smp_processor_id();
9880 break;
9881 case DUMP_NONE:
9882 goto out_enable;
9883 default:
9884 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9885 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9886 }
9887
9888 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9889
9890 /* Did function tracer already get disabled? */
9891 if (ftrace_is_dead()) {
9892 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9893 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9894 }
9895
9896 /*
9897 * We need to stop all tracing on all CPUS to read
9898 * the next buffer. This is a bit expensive, but is
9899 * not done often. We fill all what we can read,
9900 * and then release the locks again.
9901 */
9902
9903 while (!trace_empty(&iter)) {
9904
9905 if (!cnt)
9906 printk(KERN_TRACE "---------------------------------\n");
9907
9908 cnt++;
9909
9910 trace_iterator_reset(&iter);
9911 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9912
9913 if (trace_find_next_entry_inc(&iter) != NULL) {
9914 int ret;
9915
9916 ret = print_trace_line(&iter);
9917 if (ret != TRACE_TYPE_NO_CONSUME)
9918 trace_consume(&iter);
9919 }
9920 touch_nmi_watchdog();
9921
9922 trace_printk_seq(&iter.seq);
9923 }
9924
9925 if (!cnt)
9926 printk(KERN_TRACE " (ftrace buffer empty)\n");
9927 else
9928 printk(KERN_TRACE "---------------------------------\n");
9929
9930 out_enable:
9931 tr->trace_flags |= old_userobj;
9932
9933 for_each_tracing_cpu(cpu) {
9934 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9935 }
9936 atomic_dec(&dump_running);
9937 local_irq_restore(flags);
9938 }
9939 EXPORT_SYMBOL_GPL(ftrace_dump);
9940
9941 #define WRITE_BUFSIZE 4096
9942
9943 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9944 size_t count, loff_t *ppos,
9945 int (*createfn)(const char *))
9946 {
9947 char *kbuf, *buf, *tmp;
9948 int ret = 0;
9949 size_t done = 0;
9950 size_t size;
9951
9952 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9953 if (!kbuf)
9954 return -ENOMEM;
9955
9956 while (done < count) {
9957 size = count - done;
9958
9959 if (size >= WRITE_BUFSIZE)
9960 size = WRITE_BUFSIZE - 1;
9961
9962 if (copy_from_user(kbuf, buffer + done, size)) {
9963 ret = -EFAULT;
9964 goto out;
9965 }
9966 kbuf[size] = '\0';
9967 buf = kbuf;
9968 do {
9969 tmp = strchr(buf, '\n');
9970 if (tmp) {
9971 *tmp = '\0';
9972 size = tmp - buf + 1;
9973 } else {
9974 size = strlen(buf);
9975 if (done + size < count) {
9976 if (buf != kbuf)
9977 break;
9978 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9979 pr_warn("Line length is too long: Should be less than %d\n",
9980 WRITE_BUFSIZE - 2);
9981 ret = -EINVAL;
9982 goto out;
9983 }
9984 }
9985 done += size;
9986
9987 /* Remove comments */
9988 tmp = strchr(buf, '#');
9989
9990 if (tmp)
9991 *tmp = '\0';
9992
9993 ret = createfn(buf);
9994 if (ret)
9995 goto out;
9996 buf += size;
9997
9998 } while (done < count);
9999 }
10000 ret = done;
10001
10002 out:
10003 kfree(kbuf);
10004
10005 return ret;
10006 }
10007
10008 __init static int tracer_alloc_buffers(void)
10009 {
10010 int ring_buf_size;
10011 int ret = -ENOMEM;
10012
10013
10014 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10015 pr_warn("Tracing disabled due to lockdown\n");
10016 return -EPERM;
10017 }
10018
10019 /*
10020 * Make sure we don't accidentally add more trace options
10021 * than we have bits for.
10022 */
10023 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10024
10025 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10026 goto out;
10027
10028 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10029 goto out_free_buffer_mask;
10030
10031 /* Only allocate trace_printk buffers if a trace_printk exists */
10032 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10033 /* Must be called before global_trace.buffer is allocated */
10034 trace_printk_init_buffers();
10035
10036 /* To save memory, keep the ring buffer size to its minimum */
10037 if (ring_buffer_expanded)
10038 ring_buf_size = trace_buf_size;
10039 else
10040 ring_buf_size = 1;
10041
10042 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10043 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10044
10045 raw_spin_lock_init(&global_trace.start_lock);
10046
10047 /*
10048 * The prepare callbacks allocates some memory for the ring buffer. We
10049 * don't free the buffer if the CPU goes down. If we were to free
10050 * the buffer, then the user would lose any trace that was in the
10051 * buffer. The memory will be removed once the "instance" is removed.
10052 */
10053 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10054 "trace/RB:preapre", trace_rb_cpu_prepare,
10055 NULL);
10056 if (ret < 0)
10057 goto out_free_cpumask;
10058 /* Used for event triggers */
10059 ret = -ENOMEM;
10060 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10061 if (!temp_buffer)
10062 goto out_rm_hp_state;
10063
10064 if (trace_create_savedcmd() < 0)
10065 goto out_free_temp_buffer;
10066
10067 /* TODO: make the number of buffers hot pluggable with CPUS */
10068 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10069 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10070 goto out_free_savedcmd;
10071 }
10072
10073 if (global_trace.buffer_disabled)
10074 tracing_off();
10075
10076 if (trace_boot_clock) {
10077 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10078 if (ret < 0)
10079 pr_warn("Trace clock %s not defined, going back to default\n",
10080 trace_boot_clock);
10081 }
10082
10083 /*
10084 * register_tracer() might reference current_trace, so it
10085 * needs to be set before we register anything. This is
10086 * just a bootstrap of current_trace anyway.
10087 */
10088 global_trace.current_trace = &nop_trace;
10089
10090 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10091
10092 ftrace_init_global_array_ops(&global_trace);
10093
10094 init_trace_flags_index(&global_trace);
10095
10096 register_tracer(&nop_trace);
10097
10098 /* Function tracing may start here (via kernel command line) */
10099 init_function_trace();
10100
10101 /* All seems OK, enable tracing */
10102 tracing_disabled = 0;
10103
10104 atomic_notifier_chain_register(&panic_notifier_list,
10105 &trace_panic_notifier);
10106
10107 register_die_notifier(&trace_die_notifier);
10108
10109 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10110
10111 INIT_LIST_HEAD(&global_trace.systems);
10112 INIT_LIST_HEAD(&global_trace.events);
10113 INIT_LIST_HEAD(&global_trace.hist_vars);
10114 INIT_LIST_HEAD(&global_trace.err_log);
10115 list_add(&global_trace.list, &ftrace_trace_arrays);
10116
10117 apply_trace_boot_options();
10118
10119 register_snapshot_cmd();
10120
10121 test_can_verify();
10122
10123 return 0;
10124
10125 out_free_savedcmd:
10126 free_saved_cmdlines_buffer(savedcmd);
10127 out_free_temp_buffer:
10128 ring_buffer_free(temp_buffer);
10129 out_rm_hp_state:
10130 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10131 out_free_cpumask:
10132 free_cpumask_var(global_trace.tracing_cpumask);
10133 out_free_buffer_mask:
10134 free_cpumask_var(tracing_buffer_mask);
10135 out:
10136 return ret;
10137 }
10138
10139 void __init early_trace_init(void)
10140 {
10141 if (tracepoint_printk) {
10142 tracepoint_print_iter =
10143 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10144 if (MEM_FAIL(!tracepoint_print_iter,
10145 "Failed to allocate trace iterator\n"))
10146 tracepoint_printk = 0;
10147 else
10148 static_key_enable(&tracepoint_printk_key.key);
10149 }
10150 tracer_alloc_buffers();
10151 }
10152
10153 void __init trace_init(void)
10154 {
10155 trace_event_init();
10156 }
10157
10158 __init static void clear_boot_tracer(void)
10159 {
10160 /*
10161 * The default tracer at boot buffer is an init section.
10162 * This function is called in lateinit. If we did not
10163 * find the boot tracer, then clear it out, to prevent
10164 * later registration from accessing the buffer that is
10165 * about to be freed.
10166 */
10167 if (!default_bootup_tracer)
10168 return;
10169
10170 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10171 default_bootup_tracer);
10172 default_bootup_tracer = NULL;
10173 }
10174
10175 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10176 __init static void tracing_set_default_clock(void)
10177 {
10178 /* sched_clock_stable() is determined in late_initcall */
10179 if (!trace_boot_clock && !sched_clock_stable()) {
10180 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10181 pr_warn("Can not set tracing clock due to lockdown\n");
10182 return;
10183 }
10184
10185 printk(KERN_WARNING
10186 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10187 "If you want to keep using the local clock, then add:\n"
10188 " \"trace_clock=local\"\n"
10189 "on the kernel command line\n");
10190 tracing_set_clock(&global_trace, "global");
10191 }
10192 }
10193 #else
10194 static inline void tracing_set_default_clock(void) { }
10195 #endif
10196
10197 __init static int late_trace_init(void)
10198 {
10199 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10200 static_key_disable(&tracepoint_printk_key.key);
10201 tracepoint_printk = 0;
10202 }
10203
10204 tracing_set_default_clock();
10205 clear_boot_tracer();
10206 return 0;
10207 }
10208
10209 late_initcall_sync(late_trace_init);