]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - kernel/trace/trace.c
Merge tag 'trace-4.1-tracefs' of git://git.kernel.org/pub/scm/linux/kernel/git/rosted...
[mirror_ubuntu-zesty-kernel.git] / kernel / trace / trace.c
1 /*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
13 */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49 * On boot up, the ring buffer is set to the minimum size, so that
50 * we do not waste memory on systems that are not using tracing.
51 */
52 bool ring_buffer_expanded;
53
54 /*
55 * We need to change this state when a selftest is running.
56 * A selftest will lurk into the ring-buffer to count the
57 * entries inserted during the selftest although some concurrent
58 * insertions into the ring-buffer such as trace_printk could occurred
59 * at the same time, giving false positive or negative results.
60 */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64 * If a tracer is running, we do not want to run SELFTEST.
65 */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 { }
75 };
76
77 static struct tracer_flags dummy_tracer_flags = {
78 .val = 0,
79 .opts = dummy_tracer_opt
80 };
81
82 static int
83 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84 {
85 return 0;
86 }
87
88 /*
89 * To prevent the comm cache from being overwritten when no
90 * tracing is active, only save the comm when a trace event
91 * occurred.
92 */
93 static DEFINE_PER_CPU(bool, trace_cmdline_save);
94
95 /*
96 * Kill all tracing for good (never come back).
97 * It is initialized to 1 but will turn to zero if the initialization
98 * of the tracer is successful. But that is the only place that sets
99 * this back to zero.
100 */
101 static int tracing_disabled = 1;
102
103 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
104
105 cpumask_var_t __read_mostly tracing_buffer_mask;
106
107 /*
108 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
109 *
110 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
111 * is set, then ftrace_dump is called. This will output the contents
112 * of the ftrace buffers to the console. This is very useful for
113 * capturing traces that lead to crashes and outputing it to a
114 * serial console.
115 *
116 * It is default off, but you can enable it with either specifying
117 * "ftrace_dump_on_oops" in the kernel command line, or setting
118 * /proc/sys/kernel/ftrace_dump_on_oops
119 * Set 1 if you want to dump buffers of all CPUs
120 * Set 2 if you want to dump the buffer of the CPU that triggered oops
121 */
122
123 enum ftrace_dump_mode ftrace_dump_on_oops;
124
125 /* When set, tracing will stop when a WARN*() is hit */
126 int __disable_trace_on_warning;
127
128 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
129
130 #define MAX_TRACER_SIZE 100
131 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
132 static char *default_bootup_tracer;
133
134 static bool allocate_snapshot;
135
136 static int __init set_cmdline_ftrace(char *str)
137 {
138 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
139 default_bootup_tracer = bootup_tracer_buf;
140 /* We are using ftrace early, expand it */
141 ring_buffer_expanded = true;
142 return 1;
143 }
144 __setup("ftrace=", set_cmdline_ftrace);
145
146 static int __init set_ftrace_dump_on_oops(char *str)
147 {
148 if (*str++ != '=' || !*str) {
149 ftrace_dump_on_oops = DUMP_ALL;
150 return 1;
151 }
152
153 if (!strcmp("orig_cpu", str)) {
154 ftrace_dump_on_oops = DUMP_ORIG;
155 return 1;
156 }
157
158 return 0;
159 }
160 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
161
162 static int __init stop_trace_on_warning(char *str)
163 {
164 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
165 __disable_trace_on_warning = 1;
166 return 1;
167 }
168 __setup("traceoff_on_warning", stop_trace_on_warning);
169
170 static int __init boot_alloc_snapshot(char *str)
171 {
172 allocate_snapshot = true;
173 /* We also need the main ring buffer expanded */
174 ring_buffer_expanded = true;
175 return 1;
176 }
177 __setup("alloc_snapshot", boot_alloc_snapshot);
178
179
180 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
181 static char *trace_boot_options __initdata;
182
183 static int __init set_trace_boot_options(char *str)
184 {
185 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
186 trace_boot_options = trace_boot_options_buf;
187 return 0;
188 }
189 __setup("trace_options=", set_trace_boot_options);
190
191 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
192 static char *trace_boot_clock __initdata;
193
194 static int __init set_trace_boot_clock(char *str)
195 {
196 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
197 trace_boot_clock = trace_boot_clock_buf;
198 return 0;
199 }
200 __setup("trace_clock=", set_trace_boot_clock);
201
202 static int __init set_tracepoint_printk(char *str)
203 {
204 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
205 tracepoint_printk = 1;
206 return 1;
207 }
208 __setup("tp_printk", set_tracepoint_printk);
209
210 unsigned long long ns2usecs(cycle_t nsec)
211 {
212 nsec += 500;
213 do_div(nsec, 1000);
214 return nsec;
215 }
216
217 /*
218 * The global_trace is the descriptor that holds the tracing
219 * buffers for the live tracing. For each CPU, it contains
220 * a link list of pages that will store trace entries. The
221 * page descriptor of the pages in the memory is used to hold
222 * the link list by linking the lru item in the page descriptor
223 * to each of the pages in the buffer per CPU.
224 *
225 * For each active CPU there is a data field that holds the
226 * pages for the buffer for that CPU. Each CPU has the same number
227 * of pages allocated for its buffer.
228 */
229 static struct trace_array global_trace;
230
231 LIST_HEAD(ftrace_trace_arrays);
232
233 int trace_array_get(struct trace_array *this_tr)
234 {
235 struct trace_array *tr;
236 int ret = -ENODEV;
237
238 mutex_lock(&trace_types_lock);
239 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
240 if (tr == this_tr) {
241 tr->ref++;
242 ret = 0;
243 break;
244 }
245 }
246 mutex_unlock(&trace_types_lock);
247
248 return ret;
249 }
250
251 static void __trace_array_put(struct trace_array *this_tr)
252 {
253 WARN_ON(!this_tr->ref);
254 this_tr->ref--;
255 }
256
257 void trace_array_put(struct trace_array *this_tr)
258 {
259 mutex_lock(&trace_types_lock);
260 __trace_array_put(this_tr);
261 mutex_unlock(&trace_types_lock);
262 }
263
264 int filter_check_discard(struct ftrace_event_file *file, void *rec,
265 struct ring_buffer *buffer,
266 struct ring_buffer_event *event)
267 {
268 if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
269 !filter_match_preds(file->filter, rec)) {
270 ring_buffer_discard_commit(buffer, event);
271 return 1;
272 }
273
274 return 0;
275 }
276 EXPORT_SYMBOL_GPL(filter_check_discard);
277
278 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
279 struct ring_buffer *buffer,
280 struct ring_buffer_event *event)
281 {
282 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
283 !filter_match_preds(call->filter, rec)) {
284 ring_buffer_discard_commit(buffer, event);
285 return 1;
286 }
287
288 return 0;
289 }
290 EXPORT_SYMBOL_GPL(call_filter_check_discard);
291
292 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
293 {
294 u64 ts;
295
296 /* Early boot up does not have a buffer yet */
297 if (!buf->buffer)
298 return trace_clock_local();
299
300 ts = ring_buffer_time_stamp(buf->buffer, cpu);
301 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
302
303 return ts;
304 }
305
306 cycle_t ftrace_now(int cpu)
307 {
308 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
309 }
310
311 /**
312 * tracing_is_enabled - Show if global_trace has been disabled
313 *
314 * Shows if the global trace has been enabled or not. It uses the
315 * mirror flag "buffer_disabled" to be used in fast paths such as for
316 * the irqsoff tracer. But it may be inaccurate due to races. If you
317 * need to know the accurate state, use tracing_is_on() which is a little
318 * slower, but accurate.
319 */
320 int tracing_is_enabled(void)
321 {
322 /*
323 * For quick access (irqsoff uses this in fast path), just
324 * return the mirror variable of the state of the ring buffer.
325 * It's a little racy, but we don't really care.
326 */
327 smp_rmb();
328 return !global_trace.buffer_disabled;
329 }
330
331 /*
332 * trace_buf_size is the size in bytes that is allocated
333 * for a buffer. Note, the number of bytes is always rounded
334 * to page size.
335 *
336 * This number is purposely set to a low number of 16384.
337 * If the dump on oops happens, it will be much appreciated
338 * to not have to wait for all that output. Anyway this can be
339 * boot time and run time configurable.
340 */
341 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
342
343 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
344
345 /* trace_types holds a link list of available tracers. */
346 static struct tracer *trace_types __read_mostly;
347
348 /*
349 * trace_types_lock is used to protect the trace_types list.
350 */
351 DEFINE_MUTEX(trace_types_lock);
352
353 /*
354 * serialize the access of the ring buffer
355 *
356 * ring buffer serializes readers, but it is low level protection.
357 * The validity of the events (which returns by ring_buffer_peek() ..etc)
358 * are not protected by ring buffer.
359 *
360 * The content of events may become garbage if we allow other process consumes
361 * these events concurrently:
362 * A) the page of the consumed events may become a normal page
363 * (not reader page) in ring buffer, and this page will be rewrited
364 * by events producer.
365 * B) The page of the consumed events may become a page for splice_read,
366 * and this page will be returned to system.
367 *
368 * These primitives allow multi process access to different cpu ring buffer
369 * concurrently.
370 *
371 * These primitives don't distinguish read-only and read-consume access.
372 * Multi read-only access are also serialized.
373 */
374
375 #ifdef CONFIG_SMP
376 static DECLARE_RWSEM(all_cpu_access_lock);
377 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
378
379 static inline void trace_access_lock(int cpu)
380 {
381 if (cpu == RING_BUFFER_ALL_CPUS) {
382 /* gain it for accessing the whole ring buffer. */
383 down_write(&all_cpu_access_lock);
384 } else {
385 /* gain it for accessing a cpu ring buffer. */
386
387 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
388 down_read(&all_cpu_access_lock);
389
390 /* Secondly block other access to this @cpu ring buffer. */
391 mutex_lock(&per_cpu(cpu_access_lock, cpu));
392 }
393 }
394
395 static inline void trace_access_unlock(int cpu)
396 {
397 if (cpu == RING_BUFFER_ALL_CPUS) {
398 up_write(&all_cpu_access_lock);
399 } else {
400 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
401 up_read(&all_cpu_access_lock);
402 }
403 }
404
405 static inline void trace_access_lock_init(void)
406 {
407 int cpu;
408
409 for_each_possible_cpu(cpu)
410 mutex_init(&per_cpu(cpu_access_lock, cpu));
411 }
412
413 #else
414
415 static DEFINE_MUTEX(access_lock);
416
417 static inline void trace_access_lock(int cpu)
418 {
419 (void)cpu;
420 mutex_lock(&access_lock);
421 }
422
423 static inline void trace_access_unlock(int cpu)
424 {
425 (void)cpu;
426 mutex_unlock(&access_lock);
427 }
428
429 static inline void trace_access_lock_init(void)
430 {
431 }
432
433 #endif
434
435 /* trace_flags holds trace_options default values */
436 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
437 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
438 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
439 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
440
441 static void tracer_tracing_on(struct trace_array *tr)
442 {
443 if (tr->trace_buffer.buffer)
444 ring_buffer_record_on(tr->trace_buffer.buffer);
445 /*
446 * This flag is looked at when buffers haven't been allocated
447 * yet, or by some tracers (like irqsoff), that just want to
448 * know if the ring buffer has been disabled, but it can handle
449 * races of where it gets disabled but we still do a record.
450 * As the check is in the fast path of the tracers, it is more
451 * important to be fast than accurate.
452 */
453 tr->buffer_disabled = 0;
454 /* Make the flag seen by readers */
455 smp_wmb();
456 }
457
458 /**
459 * tracing_on - enable tracing buffers
460 *
461 * This function enables tracing buffers that may have been
462 * disabled with tracing_off.
463 */
464 void tracing_on(void)
465 {
466 tracer_tracing_on(&global_trace);
467 }
468 EXPORT_SYMBOL_GPL(tracing_on);
469
470 /**
471 * __trace_puts - write a constant string into the trace buffer.
472 * @ip: The address of the caller
473 * @str: The constant string to write
474 * @size: The size of the string.
475 */
476 int __trace_puts(unsigned long ip, const char *str, int size)
477 {
478 struct ring_buffer_event *event;
479 struct ring_buffer *buffer;
480 struct print_entry *entry;
481 unsigned long irq_flags;
482 int alloc;
483 int pc;
484
485 if (!(trace_flags & TRACE_ITER_PRINTK))
486 return 0;
487
488 pc = preempt_count();
489
490 if (unlikely(tracing_selftest_running || tracing_disabled))
491 return 0;
492
493 alloc = sizeof(*entry) + size + 2; /* possible \n added */
494
495 local_save_flags(irq_flags);
496 buffer = global_trace.trace_buffer.buffer;
497 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
498 irq_flags, pc);
499 if (!event)
500 return 0;
501
502 entry = ring_buffer_event_data(event);
503 entry->ip = ip;
504
505 memcpy(&entry->buf, str, size);
506
507 /* Add a newline if necessary */
508 if (entry->buf[size - 1] != '\n') {
509 entry->buf[size] = '\n';
510 entry->buf[size + 1] = '\0';
511 } else
512 entry->buf[size] = '\0';
513
514 __buffer_unlock_commit(buffer, event);
515 ftrace_trace_stack(buffer, irq_flags, 4, pc);
516
517 return size;
518 }
519 EXPORT_SYMBOL_GPL(__trace_puts);
520
521 /**
522 * __trace_bputs - write the pointer to a constant string into trace buffer
523 * @ip: The address of the caller
524 * @str: The constant string to write to the buffer to
525 */
526 int __trace_bputs(unsigned long ip, const char *str)
527 {
528 struct ring_buffer_event *event;
529 struct ring_buffer *buffer;
530 struct bputs_entry *entry;
531 unsigned long irq_flags;
532 int size = sizeof(struct bputs_entry);
533 int pc;
534
535 if (!(trace_flags & TRACE_ITER_PRINTK))
536 return 0;
537
538 pc = preempt_count();
539
540 if (unlikely(tracing_selftest_running || tracing_disabled))
541 return 0;
542
543 local_save_flags(irq_flags);
544 buffer = global_trace.trace_buffer.buffer;
545 event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
546 irq_flags, pc);
547 if (!event)
548 return 0;
549
550 entry = ring_buffer_event_data(event);
551 entry->ip = ip;
552 entry->str = str;
553
554 __buffer_unlock_commit(buffer, event);
555 ftrace_trace_stack(buffer, irq_flags, 4, pc);
556
557 return 1;
558 }
559 EXPORT_SYMBOL_GPL(__trace_bputs);
560
561 #ifdef CONFIG_TRACER_SNAPSHOT
562 /**
563 * trace_snapshot - take a snapshot of the current buffer.
564 *
565 * This causes a swap between the snapshot buffer and the current live
566 * tracing buffer. You can use this to take snapshots of the live
567 * trace when some condition is triggered, but continue to trace.
568 *
569 * Note, make sure to allocate the snapshot with either
570 * a tracing_snapshot_alloc(), or by doing it manually
571 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
572 *
573 * If the snapshot buffer is not allocated, it will stop tracing.
574 * Basically making a permanent snapshot.
575 */
576 void tracing_snapshot(void)
577 {
578 struct trace_array *tr = &global_trace;
579 struct tracer *tracer = tr->current_trace;
580 unsigned long flags;
581
582 if (in_nmi()) {
583 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
584 internal_trace_puts("*** snapshot is being ignored ***\n");
585 return;
586 }
587
588 if (!tr->allocated_snapshot) {
589 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
590 internal_trace_puts("*** stopping trace here! ***\n");
591 tracing_off();
592 return;
593 }
594
595 /* Note, snapshot can not be used when the tracer uses it */
596 if (tracer->use_max_tr) {
597 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
598 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
599 return;
600 }
601
602 local_irq_save(flags);
603 update_max_tr(tr, current, smp_processor_id());
604 local_irq_restore(flags);
605 }
606 EXPORT_SYMBOL_GPL(tracing_snapshot);
607
608 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
609 struct trace_buffer *size_buf, int cpu_id);
610 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
611
612 static int alloc_snapshot(struct trace_array *tr)
613 {
614 int ret;
615
616 if (!tr->allocated_snapshot) {
617
618 /* allocate spare buffer */
619 ret = resize_buffer_duplicate_size(&tr->max_buffer,
620 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
621 if (ret < 0)
622 return ret;
623
624 tr->allocated_snapshot = true;
625 }
626
627 return 0;
628 }
629
630 static void free_snapshot(struct trace_array *tr)
631 {
632 /*
633 * We don't free the ring buffer. instead, resize it because
634 * The max_tr ring buffer has some state (e.g. ring->clock) and
635 * we want preserve it.
636 */
637 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
638 set_buffer_entries(&tr->max_buffer, 1);
639 tracing_reset_online_cpus(&tr->max_buffer);
640 tr->allocated_snapshot = false;
641 }
642
643 /**
644 * tracing_alloc_snapshot - allocate snapshot buffer.
645 *
646 * This only allocates the snapshot buffer if it isn't already
647 * allocated - it doesn't also take a snapshot.
648 *
649 * This is meant to be used in cases where the snapshot buffer needs
650 * to be set up for events that can't sleep but need to be able to
651 * trigger a snapshot.
652 */
653 int tracing_alloc_snapshot(void)
654 {
655 struct trace_array *tr = &global_trace;
656 int ret;
657
658 ret = alloc_snapshot(tr);
659 WARN_ON(ret < 0);
660
661 return ret;
662 }
663 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
664
665 /**
666 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
667 *
668 * This is similar to trace_snapshot(), but it will allocate the
669 * snapshot buffer if it isn't already allocated. Use this only
670 * where it is safe to sleep, as the allocation may sleep.
671 *
672 * This causes a swap between the snapshot buffer and the current live
673 * tracing buffer. You can use this to take snapshots of the live
674 * trace when some condition is triggered, but continue to trace.
675 */
676 void tracing_snapshot_alloc(void)
677 {
678 int ret;
679
680 ret = tracing_alloc_snapshot();
681 if (ret < 0)
682 return;
683
684 tracing_snapshot();
685 }
686 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
687 #else
688 void tracing_snapshot(void)
689 {
690 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
691 }
692 EXPORT_SYMBOL_GPL(tracing_snapshot);
693 int tracing_alloc_snapshot(void)
694 {
695 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
696 return -ENODEV;
697 }
698 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
699 void tracing_snapshot_alloc(void)
700 {
701 /* Give warning */
702 tracing_snapshot();
703 }
704 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
705 #endif /* CONFIG_TRACER_SNAPSHOT */
706
707 static void tracer_tracing_off(struct trace_array *tr)
708 {
709 if (tr->trace_buffer.buffer)
710 ring_buffer_record_off(tr->trace_buffer.buffer);
711 /*
712 * This flag is looked at when buffers haven't been allocated
713 * yet, or by some tracers (like irqsoff), that just want to
714 * know if the ring buffer has been disabled, but it can handle
715 * races of where it gets disabled but we still do a record.
716 * As the check is in the fast path of the tracers, it is more
717 * important to be fast than accurate.
718 */
719 tr->buffer_disabled = 1;
720 /* Make the flag seen by readers */
721 smp_wmb();
722 }
723
724 /**
725 * tracing_off - turn off tracing buffers
726 *
727 * This function stops the tracing buffers from recording data.
728 * It does not disable any overhead the tracers themselves may
729 * be causing. This function simply causes all recording to
730 * the ring buffers to fail.
731 */
732 void tracing_off(void)
733 {
734 tracer_tracing_off(&global_trace);
735 }
736 EXPORT_SYMBOL_GPL(tracing_off);
737
738 void disable_trace_on_warning(void)
739 {
740 if (__disable_trace_on_warning)
741 tracing_off();
742 }
743
744 /**
745 * tracer_tracing_is_on - show real state of ring buffer enabled
746 * @tr : the trace array to know if ring buffer is enabled
747 *
748 * Shows real state of the ring buffer if it is enabled or not.
749 */
750 static int tracer_tracing_is_on(struct trace_array *tr)
751 {
752 if (tr->trace_buffer.buffer)
753 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
754 return !tr->buffer_disabled;
755 }
756
757 /**
758 * tracing_is_on - show state of ring buffers enabled
759 */
760 int tracing_is_on(void)
761 {
762 return tracer_tracing_is_on(&global_trace);
763 }
764 EXPORT_SYMBOL_GPL(tracing_is_on);
765
766 static int __init set_buf_size(char *str)
767 {
768 unsigned long buf_size;
769
770 if (!str)
771 return 0;
772 buf_size = memparse(str, &str);
773 /* nr_entries can not be zero */
774 if (buf_size == 0)
775 return 0;
776 trace_buf_size = buf_size;
777 return 1;
778 }
779 __setup("trace_buf_size=", set_buf_size);
780
781 static int __init set_tracing_thresh(char *str)
782 {
783 unsigned long threshold;
784 int ret;
785
786 if (!str)
787 return 0;
788 ret = kstrtoul(str, 0, &threshold);
789 if (ret < 0)
790 return 0;
791 tracing_thresh = threshold * 1000;
792 return 1;
793 }
794 __setup("tracing_thresh=", set_tracing_thresh);
795
796 unsigned long nsecs_to_usecs(unsigned long nsecs)
797 {
798 return nsecs / 1000;
799 }
800
801 /* These must match the bit postions in trace_iterator_flags */
802 static const char *trace_options[] = {
803 "print-parent",
804 "sym-offset",
805 "sym-addr",
806 "verbose",
807 "raw",
808 "hex",
809 "bin",
810 "block",
811 "stacktrace",
812 "trace_printk",
813 "ftrace_preempt",
814 "branch",
815 "annotate",
816 "userstacktrace",
817 "sym-userobj",
818 "printk-msg-only",
819 "context-info",
820 "latency-format",
821 "sleep-time",
822 "graph-time",
823 "record-cmd",
824 "overwrite",
825 "disable_on_free",
826 "irq-info",
827 "markers",
828 "function-trace",
829 NULL
830 };
831
832 static struct {
833 u64 (*func)(void);
834 const char *name;
835 int in_ns; /* is this clock in nanoseconds? */
836 } trace_clocks[] = {
837 { trace_clock_local, "local", 1 },
838 { trace_clock_global, "global", 1 },
839 { trace_clock_counter, "counter", 0 },
840 { trace_clock_jiffies, "uptime", 0 },
841 { trace_clock, "perf", 1 },
842 { ktime_get_mono_fast_ns, "mono", 1 },
843 ARCH_TRACE_CLOCKS
844 };
845
846 /*
847 * trace_parser_get_init - gets the buffer for trace parser
848 */
849 int trace_parser_get_init(struct trace_parser *parser, int size)
850 {
851 memset(parser, 0, sizeof(*parser));
852
853 parser->buffer = kmalloc(size, GFP_KERNEL);
854 if (!parser->buffer)
855 return 1;
856
857 parser->size = size;
858 return 0;
859 }
860
861 /*
862 * trace_parser_put - frees the buffer for trace parser
863 */
864 void trace_parser_put(struct trace_parser *parser)
865 {
866 kfree(parser->buffer);
867 }
868
869 /*
870 * trace_get_user - reads the user input string separated by space
871 * (matched by isspace(ch))
872 *
873 * For each string found the 'struct trace_parser' is updated,
874 * and the function returns.
875 *
876 * Returns number of bytes read.
877 *
878 * See kernel/trace/trace.h for 'struct trace_parser' details.
879 */
880 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
881 size_t cnt, loff_t *ppos)
882 {
883 char ch;
884 size_t read = 0;
885 ssize_t ret;
886
887 if (!*ppos)
888 trace_parser_clear(parser);
889
890 ret = get_user(ch, ubuf++);
891 if (ret)
892 goto out;
893
894 read++;
895 cnt--;
896
897 /*
898 * The parser is not finished with the last write,
899 * continue reading the user input without skipping spaces.
900 */
901 if (!parser->cont) {
902 /* skip white space */
903 while (cnt && isspace(ch)) {
904 ret = get_user(ch, ubuf++);
905 if (ret)
906 goto out;
907 read++;
908 cnt--;
909 }
910
911 /* only spaces were written */
912 if (isspace(ch)) {
913 *ppos += read;
914 ret = read;
915 goto out;
916 }
917
918 parser->idx = 0;
919 }
920
921 /* read the non-space input */
922 while (cnt && !isspace(ch)) {
923 if (parser->idx < parser->size - 1)
924 parser->buffer[parser->idx++] = ch;
925 else {
926 ret = -EINVAL;
927 goto out;
928 }
929 ret = get_user(ch, ubuf++);
930 if (ret)
931 goto out;
932 read++;
933 cnt--;
934 }
935
936 /* We either got finished input or we have to wait for another call. */
937 if (isspace(ch)) {
938 parser->buffer[parser->idx] = 0;
939 parser->cont = false;
940 } else if (parser->idx < parser->size - 1) {
941 parser->cont = true;
942 parser->buffer[parser->idx++] = ch;
943 } else {
944 ret = -EINVAL;
945 goto out;
946 }
947
948 *ppos += read;
949 ret = read;
950
951 out:
952 return ret;
953 }
954
955 /* TODO add a seq_buf_to_buffer() */
956 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
957 {
958 int len;
959
960 if (trace_seq_used(s) <= s->seq.readpos)
961 return -EBUSY;
962
963 len = trace_seq_used(s) - s->seq.readpos;
964 if (cnt > len)
965 cnt = len;
966 memcpy(buf, s->buffer + s->seq.readpos, cnt);
967
968 s->seq.readpos += cnt;
969 return cnt;
970 }
971
972 unsigned long __read_mostly tracing_thresh;
973
974 #ifdef CONFIG_TRACER_MAX_TRACE
975 /*
976 * Copy the new maximum trace into the separate maximum-trace
977 * structure. (this way the maximum trace is permanently saved,
978 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
979 */
980 static void
981 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
982 {
983 struct trace_buffer *trace_buf = &tr->trace_buffer;
984 struct trace_buffer *max_buf = &tr->max_buffer;
985 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
986 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
987
988 max_buf->cpu = cpu;
989 max_buf->time_start = data->preempt_timestamp;
990
991 max_data->saved_latency = tr->max_latency;
992 max_data->critical_start = data->critical_start;
993 max_data->critical_end = data->critical_end;
994
995 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
996 max_data->pid = tsk->pid;
997 /*
998 * If tsk == current, then use current_uid(), as that does not use
999 * RCU. The irq tracer can be called out of RCU scope.
1000 */
1001 if (tsk == current)
1002 max_data->uid = current_uid();
1003 else
1004 max_data->uid = task_uid(tsk);
1005
1006 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1007 max_data->policy = tsk->policy;
1008 max_data->rt_priority = tsk->rt_priority;
1009
1010 /* record this tasks comm */
1011 tracing_record_cmdline(tsk);
1012 }
1013
1014 /**
1015 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1016 * @tr: tracer
1017 * @tsk: the task with the latency
1018 * @cpu: The cpu that initiated the trace.
1019 *
1020 * Flip the buffers between the @tr and the max_tr and record information
1021 * about which task was the cause of this latency.
1022 */
1023 void
1024 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1025 {
1026 struct ring_buffer *buf;
1027
1028 if (tr->stop_count)
1029 return;
1030
1031 WARN_ON_ONCE(!irqs_disabled());
1032
1033 if (!tr->allocated_snapshot) {
1034 /* Only the nop tracer should hit this when disabling */
1035 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1036 return;
1037 }
1038
1039 arch_spin_lock(&tr->max_lock);
1040
1041 buf = tr->trace_buffer.buffer;
1042 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1043 tr->max_buffer.buffer = buf;
1044
1045 __update_max_tr(tr, tsk, cpu);
1046 arch_spin_unlock(&tr->max_lock);
1047 }
1048
1049 /**
1050 * update_max_tr_single - only copy one trace over, and reset the rest
1051 * @tr - tracer
1052 * @tsk - task with the latency
1053 * @cpu - the cpu of the buffer to copy.
1054 *
1055 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1056 */
1057 void
1058 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1059 {
1060 int ret;
1061
1062 if (tr->stop_count)
1063 return;
1064
1065 WARN_ON_ONCE(!irqs_disabled());
1066 if (!tr->allocated_snapshot) {
1067 /* Only the nop tracer should hit this when disabling */
1068 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069 return;
1070 }
1071
1072 arch_spin_lock(&tr->max_lock);
1073
1074 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1075
1076 if (ret == -EBUSY) {
1077 /*
1078 * We failed to swap the buffer due to a commit taking
1079 * place on this CPU. We fail to record, but we reset
1080 * the max trace buffer (no one writes directly to it)
1081 * and flag that it failed.
1082 */
1083 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1084 "Failed to swap buffers due to commit in progress\n");
1085 }
1086
1087 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1088
1089 __update_max_tr(tr, tsk, cpu);
1090 arch_spin_unlock(&tr->max_lock);
1091 }
1092 #endif /* CONFIG_TRACER_MAX_TRACE */
1093
1094 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1095 {
1096 /* Iterators are static, they should be filled or empty */
1097 if (trace_buffer_iter(iter, iter->cpu_file))
1098 return 0;
1099
1100 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1101 full);
1102 }
1103
1104 #ifdef CONFIG_FTRACE_STARTUP_TEST
1105 static int run_tracer_selftest(struct tracer *type)
1106 {
1107 struct trace_array *tr = &global_trace;
1108 struct tracer *saved_tracer = tr->current_trace;
1109 int ret;
1110
1111 if (!type->selftest || tracing_selftest_disabled)
1112 return 0;
1113
1114 /*
1115 * Run a selftest on this tracer.
1116 * Here we reset the trace buffer, and set the current
1117 * tracer to be this tracer. The tracer can then run some
1118 * internal tracing to verify that everything is in order.
1119 * If we fail, we do not register this tracer.
1120 */
1121 tracing_reset_online_cpus(&tr->trace_buffer);
1122
1123 tr->current_trace = type;
1124
1125 #ifdef CONFIG_TRACER_MAX_TRACE
1126 if (type->use_max_tr) {
1127 /* If we expanded the buffers, make sure the max is expanded too */
1128 if (ring_buffer_expanded)
1129 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1130 RING_BUFFER_ALL_CPUS);
1131 tr->allocated_snapshot = true;
1132 }
1133 #endif
1134
1135 /* the test is responsible for initializing and enabling */
1136 pr_info("Testing tracer %s: ", type->name);
1137 ret = type->selftest(type, tr);
1138 /* the test is responsible for resetting too */
1139 tr->current_trace = saved_tracer;
1140 if (ret) {
1141 printk(KERN_CONT "FAILED!\n");
1142 /* Add the warning after printing 'FAILED' */
1143 WARN_ON(1);
1144 return -1;
1145 }
1146 /* Only reset on passing, to avoid touching corrupted buffers */
1147 tracing_reset_online_cpus(&tr->trace_buffer);
1148
1149 #ifdef CONFIG_TRACER_MAX_TRACE
1150 if (type->use_max_tr) {
1151 tr->allocated_snapshot = false;
1152
1153 /* Shrink the max buffer again */
1154 if (ring_buffer_expanded)
1155 ring_buffer_resize(tr->max_buffer.buffer, 1,
1156 RING_BUFFER_ALL_CPUS);
1157 }
1158 #endif
1159
1160 printk(KERN_CONT "PASSED\n");
1161 return 0;
1162 }
1163 #else
1164 static inline int run_tracer_selftest(struct tracer *type)
1165 {
1166 return 0;
1167 }
1168 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1169
1170 /**
1171 * register_tracer - register a tracer with the ftrace system.
1172 * @type - the plugin for the tracer
1173 *
1174 * Register a new plugin tracer.
1175 */
1176 int register_tracer(struct tracer *type)
1177 {
1178 struct tracer *t;
1179 int ret = 0;
1180
1181 if (!type->name) {
1182 pr_info("Tracer must have a name\n");
1183 return -1;
1184 }
1185
1186 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1187 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1188 return -1;
1189 }
1190
1191 mutex_lock(&trace_types_lock);
1192
1193 tracing_selftest_running = true;
1194
1195 for (t = trace_types; t; t = t->next) {
1196 if (strcmp(type->name, t->name) == 0) {
1197 /* already found */
1198 pr_info("Tracer %s already registered\n",
1199 type->name);
1200 ret = -1;
1201 goto out;
1202 }
1203 }
1204
1205 if (!type->set_flag)
1206 type->set_flag = &dummy_set_flag;
1207 if (!type->flags)
1208 type->flags = &dummy_tracer_flags;
1209 else
1210 if (!type->flags->opts)
1211 type->flags->opts = dummy_tracer_opt;
1212
1213 ret = run_tracer_selftest(type);
1214 if (ret < 0)
1215 goto out;
1216
1217 type->next = trace_types;
1218 trace_types = type;
1219
1220 out:
1221 tracing_selftest_running = false;
1222 mutex_unlock(&trace_types_lock);
1223
1224 if (ret || !default_bootup_tracer)
1225 goto out_unlock;
1226
1227 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1228 goto out_unlock;
1229
1230 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1231 /* Do we want this tracer to start on bootup? */
1232 tracing_set_tracer(&global_trace, type->name);
1233 default_bootup_tracer = NULL;
1234 /* disable other selftests, since this will break it. */
1235 tracing_selftest_disabled = true;
1236 #ifdef CONFIG_FTRACE_STARTUP_TEST
1237 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1238 type->name);
1239 #endif
1240
1241 out_unlock:
1242 return ret;
1243 }
1244
1245 void tracing_reset(struct trace_buffer *buf, int cpu)
1246 {
1247 struct ring_buffer *buffer = buf->buffer;
1248
1249 if (!buffer)
1250 return;
1251
1252 ring_buffer_record_disable(buffer);
1253
1254 /* Make sure all commits have finished */
1255 synchronize_sched();
1256 ring_buffer_reset_cpu(buffer, cpu);
1257
1258 ring_buffer_record_enable(buffer);
1259 }
1260
1261 void tracing_reset_online_cpus(struct trace_buffer *buf)
1262 {
1263 struct ring_buffer *buffer = buf->buffer;
1264 int cpu;
1265
1266 if (!buffer)
1267 return;
1268
1269 ring_buffer_record_disable(buffer);
1270
1271 /* Make sure all commits have finished */
1272 synchronize_sched();
1273
1274 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1275
1276 for_each_online_cpu(cpu)
1277 ring_buffer_reset_cpu(buffer, cpu);
1278
1279 ring_buffer_record_enable(buffer);
1280 }
1281
1282 /* Must have trace_types_lock held */
1283 void tracing_reset_all_online_cpus(void)
1284 {
1285 struct trace_array *tr;
1286
1287 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1288 tracing_reset_online_cpus(&tr->trace_buffer);
1289 #ifdef CONFIG_TRACER_MAX_TRACE
1290 tracing_reset_online_cpus(&tr->max_buffer);
1291 #endif
1292 }
1293 }
1294
1295 #define SAVED_CMDLINES_DEFAULT 128
1296 #define NO_CMDLINE_MAP UINT_MAX
1297 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1298 struct saved_cmdlines_buffer {
1299 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1300 unsigned *map_cmdline_to_pid;
1301 unsigned cmdline_num;
1302 int cmdline_idx;
1303 char *saved_cmdlines;
1304 };
1305 static struct saved_cmdlines_buffer *savedcmd;
1306
1307 /* temporary disable recording */
1308 static atomic_t trace_record_cmdline_disabled __read_mostly;
1309
1310 static inline char *get_saved_cmdlines(int idx)
1311 {
1312 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1313 }
1314
1315 static inline void set_cmdline(int idx, const char *cmdline)
1316 {
1317 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1318 }
1319
1320 static int allocate_cmdlines_buffer(unsigned int val,
1321 struct saved_cmdlines_buffer *s)
1322 {
1323 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1324 GFP_KERNEL);
1325 if (!s->map_cmdline_to_pid)
1326 return -ENOMEM;
1327
1328 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1329 if (!s->saved_cmdlines) {
1330 kfree(s->map_cmdline_to_pid);
1331 return -ENOMEM;
1332 }
1333
1334 s->cmdline_idx = 0;
1335 s->cmdline_num = val;
1336 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1337 sizeof(s->map_pid_to_cmdline));
1338 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1339 val * sizeof(*s->map_cmdline_to_pid));
1340
1341 return 0;
1342 }
1343
1344 static int trace_create_savedcmd(void)
1345 {
1346 int ret;
1347
1348 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1349 if (!savedcmd)
1350 return -ENOMEM;
1351
1352 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1353 if (ret < 0) {
1354 kfree(savedcmd);
1355 savedcmd = NULL;
1356 return -ENOMEM;
1357 }
1358
1359 return 0;
1360 }
1361
1362 int is_tracing_stopped(void)
1363 {
1364 return global_trace.stop_count;
1365 }
1366
1367 /**
1368 * tracing_start - quick start of the tracer
1369 *
1370 * If tracing is enabled but was stopped by tracing_stop,
1371 * this will start the tracer back up.
1372 */
1373 void tracing_start(void)
1374 {
1375 struct ring_buffer *buffer;
1376 unsigned long flags;
1377
1378 if (tracing_disabled)
1379 return;
1380
1381 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1382 if (--global_trace.stop_count) {
1383 if (global_trace.stop_count < 0) {
1384 /* Someone screwed up their debugging */
1385 WARN_ON_ONCE(1);
1386 global_trace.stop_count = 0;
1387 }
1388 goto out;
1389 }
1390
1391 /* Prevent the buffers from switching */
1392 arch_spin_lock(&global_trace.max_lock);
1393
1394 buffer = global_trace.trace_buffer.buffer;
1395 if (buffer)
1396 ring_buffer_record_enable(buffer);
1397
1398 #ifdef CONFIG_TRACER_MAX_TRACE
1399 buffer = global_trace.max_buffer.buffer;
1400 if (buffer)
1401 ring_buffer_record_enable(buffer);
1402 #endif
1403
1404 arch_spin_unlock(&global_trace.max_lock);
1405
1406 out:
1407 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1408 }
1409
1410 static void tracing_start_tr(struct trace_array *tr)
1411 {
1412 struct ring_buffer *buffer;
1413 unsigned long flags;
1414
1415 if (tracing_disabled)
1416 return;
1417
1418 /* If global, we need to also start the max tracer */
1419 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1420 return tracing_start();
1421
1422 raw_spin_lock_irqsave(&tr->start_lock, flags);
1423
1424 if (--tr->stop_count) {
1425 if (tr->stop_count < 0) {
1426 /* Someone screwed up their debugging */
1427 WARN_ON_ONCE(1);
1428 tr->stop_count = 0;
1429 }
1430 goto out;
1431 }
1432
1433 buffer = tr->trace_buffer.buffer;
1434 if (buffer)
1435 ring_buffer_record_enable(buffer);
1436
1437 out:
1438 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1439 }
1440
1441 /**
1442 * tracing_stop - quick stop of the tracer
1443 *
1444 * Light weight way to stop tracing. Use in conjunction with
1445 * tracing_start.
1446 */
1447 void tracing_stop(void)
1448 {
1449 struct ring_buffer *buffer;
1450 unsigned long flags;
1451
1452 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1453 if (global_trace.stop_count++)
1454 goto out;
1455
1456 /* Prevent the buffers from switching */
1457 arch_spin_lock(&global_trace.max_lock);
1458
1459 buffer = global_trace.trace_buffer.buffer;
1460 if (buffer)
1461 ring_buffer_record_disable(buffer);
1462
1463 #ifdef CONFIG_TRACER_MAX_TRACE
1464 buffer = global_trace.max_buffer.buffer;
1465 if (buffer)
1466 ring_buffer_record_disable(buffer);
1467 #endif
1468
1469 arch_spin_unlock(&global_trace.max_lock);
1470
1471 out:
1472 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1473 }
1474
1475 static void tracing_stop_tr(struct trace_array *tr)
1476 {
1477 struct ring_buffer *buffer;
1478 unsigned long flags;
1479
1480 /* If global, we need to also stop the max tracer */
1481 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1482 return tracing_stop();
1483
1484 raw_spin_lock_irqsave(&tr->start_lock, flags);
1485 if (tr->stop_count++)
1486 goto out;
1487
1488 buffer = tr->trace_buffer.buffer;
1489 if (buffer)
1490 ring_buffer_record_disable(buffer);
1491
1492 out:
1493 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1494 }
1495
1496 void trace_stop_cmdline_recording(void);
1497
1498 static int trace_save_cmdline(struct task_struct *tsk)
1499 {
1500 unsigned pid, idx;
1501
1502 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1503 return 0;
1504
1505 /*
1506 * It's not the end of the world if we don't get
1507 * the lock, but we also don't want to spin
1508 * nor do we want to disable interrupts,
1509 * so if we miss here, then better luck next time.
1510 */
1511 if (!arch_spin_trylock(&trace_cmdline_lock))
1512 return 0;
1513
1514 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1515 if (idx == NO_CMDLINE_MAP) {
1516 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1517
1518 /*
1519 * Check whether the cmdline buffer at idx has a pid
1520 * mapped. We are going to overwrite that entry so we
1521 * need to clear the map_pid_to_cmdline. Otherwise we
1522 * would read the new comm for the old pid.
1523 */
1524 pid = savedcmd->map_cmdline_to_pid[idx];
1525 if (pid != NO_CMDLINE_MAP)
1526 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1527
1528 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1529 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1530
1531 savedcmd->cmdline_idx = idx;
1532 }
1533
1534 set_cmdline(idx, tsk->comm);
1535
1536 arch_spin_unlock(&trace_cmdline_lock);
1537
1538 return 1;
1539 }
1540
1541 static void __trace_find_cmdline(int pid, char comm[])
1542 {
1543 unsigned map;
1544
1545 if (!pid) {
1546 strcpy(comm, "<idle>");
1547 return;
1548 }
1549
1550 if (WARN_ON_ONCE(pid < 0)) {
1551 strcpy(comm, "<XXX>");
1552 return;
1553 }
1554
1555 if (pid > PID_MAX_DEFAULT) {
1556 strcpy(comm, "<...>");
1557 return;
1558 }
1559
1560 map = savedcmd->map_pid_to_cmdline[pid];
1561 if (map != NO_CMDLINE_MAP)
1562 strcpy(comm, get_saved_cmdlines(map));
1563 else
1564 strcpy(comm, "<...>");
1565 }
1566
1567 void trace_find_cmdline(int pid, char comm[])
1568 {
1569 preempt_disable();
1570 arch_spin_lock(&trace_cmdline_lock);
1571
1572 __trace_find_cmdline(pid, comm);
1573
1574 arch_spin_unlock(&trace_cmdline_lock);
1575 preempt_enable();
1576 }
1577
1578 void tracing_record_cmdline(struct task_struct *tsk)
1579 {
1580 if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1581 return;
1582
1583 if (!__this_cpu_read(trace_cmdline_save))
1584 return;
1585
1586 if (trace_save_cmdline(tsk))
1587 __this_cpu_write(trace_cmdline_save, false);
1588 }
1589
1590 void
1591 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1592 int pc)
1593 {
1594 struct task_struct *tsk = current;
1595
1596 entry->preempt_count = pc & 0xff;
1597 entry->pid = (tsk) ? tsk->pid : 0;
1598 entry->flags =
1599 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1600 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1601 #else
1602 TRACE_FLAG_IRQS_NOSUPPORT |
1603 #endif
1604 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1605 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1606 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1607 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1608 }
1609 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1610
1611 struct ring_buffer_event *
1612 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1613 int type,
1614 unsigned long len,
1615 unsigned long flags, int pc)
1616 {
1617 struct ring_buffer_event *event;
1618
1619 event = ring_buffer_lock_reserve(buffer, len);
1620 if (event != NULL) {
1621 struct trace_entry *ent = ring_buffer_event_data(event);
1622
1623 tracing_generic_entry_update(ent, flags, pc);
1624 ent->type = type;
1625 }
1626
1627 return event;
1628 }
1629
1630 void
1631 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1632 {
1633 __this_cpu_write(trace_cmdline_save, true);
1634 ring_buffer_unlock_commit(buffer, event);
1635 }
1636
1637 static inline void
1638 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1639 struct ring_buffer_event *event,
1640 unsigned long flags, int pc)
1641 {
1642 __buffer_unlock_commit(buffer, event);
1643
1644 ftrace_trace_stack(buffer, flags, 6, pc);
1645 ftrace_trace_userstack(buffer, flags, pc);
1646 }
1647
1648 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1649 struct ring_buffer_event *event,
1650 unsigned long flags, int pc)
1651 {
1652 __trace_buffer_unlock_commit(buffer, event, flags, pc);
1653 }
1654 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1655
1656 static struct ring_buffer *temp_buffer;
1657
1658 struct ring_buffer_event *
1659 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1660 struct ftrace_event_file *ftrace_file,
1661 int type, unsigned long len,
1662 unsigned long flags, int pc)
1663 {
1664 struct ring_buffer_event *entry;
1665
1666 *current_rb = ftrace_file->tr->trace_buffer.buffer;
1667 entry = trace_buffer_lock_reserve(*current_rb,
1668 type, len, flags, pc);
1669 /*
1670 * If tracing is off, but we have triggers enabled
1671 * we still need to look at the event data. Use the temp_buffer
1672 * to store the trace event for the tigger to use. It's recusive
1673 * safe and will not be recorded anywhere.
1674 */
1675 if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1676 *current_rb = temp_buffer;
1677 entry = trace_buffer_lock_reserve(*current_rb,
1678 type, len, flags, pc);
1679 }
1680 return entry;
1681 }
1682 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1683
1684 struct ring_buffer_event *
1685 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1686 int type, unsigned long len,
1687 unsigned long flags, int pc)
1688 {
1689 *current_rb = global_trace.trace_buffer.buffer;
1690 return trace_buffer_lock_reserve(*current_rb,
1691 type, len, flags, pc);
1692 }
1693 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1694
1695 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1696 struct ring_buffer_event *event,
1697 unsigned long flags, int pc)
1698 {
1699 __trace_buffer_unlock_commit(buffer, event, flags, pc);
1700 }
1701 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1702
1703 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1704 struct ring_buffer_event *event,
1705 unsigned long flags, int pc,
1706 struct pt_regs *regs)
1707 {
1708 __buffer_unlock_commit(buffer, event);
1709
1710 ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1711 ftrace_trace_userstack(buffer, flags, pc);
1712 }
1713 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1714
1715 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1716 struct ring_buffer_event *event)
1717 {
1718 ring_buffer_discard_commit(buffer, event);
1719 }
1720 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1721
1722 void
1723 trace_function(struct trace_array *tr,
1724 unsigned long ip, unsigned long parent_ip, unsigned long flags,
1725 int pc)
1726 {
1727 struct ftrace_event_call *call = &event_function;
1728 struct ring_buffer *buffer = tr->trace_buffer.buffer;
1729 struct ring_buffer_event *event;
1730 struct ftrace_entry *entry;
1731
1732 /* If we are reading the ring buffer, don't trace */
1733 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1734 return;
1735
1736 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1737 flags, pc);
1738 if (!event)
1739 return;
1740 entry = ring_buffer_event_data(event);
1741 entry->ip = ip;
1742 entry->parent_ip = parent_ip;
1743
1744 if (!call_filter_check_discard(call, entry, buffer, event))
1745 __buffer_unlock_commit(buffer, event);
1746 }
1747
1748 #ifdef CONFIG_STACKTRACE
1749
1750 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1751 struct ftrace_stack {
1752 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
1753 };
1754
1755 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1756 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1757
1758 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1759 unsigned long flags,
1760 int skip, int pc, struct pt_regs *regs)
1761 {
1762 struct ftrace_event_call *call = &event_kernel_stack;
1763 struct ring_buffer_event *event;
1764 struct stack_entry *entry;
1765 struct stack_trace trace;
1766 int use_stack;
1767 int size = FTRACE_STACK_ENTRIES;
1768
1769 trace.nr_entries = 0;
1770 trace.skip = skip;
1771
1772 /*
1773 * Since events can happen in NMIs there's no safe way to
1774 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1775 * or NMI comes in, it will just have to use the default
1776 * FTRACE_STACK_SIZE.
1777 */
1778 preempt_disable_notrace();
1779
1780 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1781 /*
1782 * We don't need any atomic variables, just a barrier.
1783 * If an interrupt comes in, we don't care, because it would
1784 * have exited and put the counter back to what we want.
1785 * We just need a barrier to keep gcc from moving things
1786 * around.
1787 */
1788 barrier();
1789 if (use_stack == 1) {
1790 trace.entries = this_cpu_ptr(ftrace_stack.calls);
1791 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
1792
1793 if (regs)
1794 save_stack_trace_regs(regs, &trace);
1795 else
1796 save_stack_trace(&trace);
1797
1798 if (trace.nr_entries > size)
1799 size = trace.nr_entries;
1800 } else
1801 /* From now on, use_stack is a boolean */
1802 use_stack = 0;
1803
1804 size *= sizeof(unsigned long);
1805
1806 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1807 sizeof(*entry) + size, flags, pc);
1808 if (!event)
1809 goto out;
1810 entry = ring_buffer_event_data(event);
1811
1812 memset(&entry->caller, 0, size);
1813
1814 if (use_stack)
1815 memcpy(&entry->caller, trace.entries,
1816 trace.nr_entries * sizeof(unsigned long));
1817 else {
1818 trace.max_entries = FTRACE_STACK_ENTRIES;
1819 trace.entries = entry->caller;
1820 if (regs)
1821 save_stack_trace_regs(regs, &trace);
1822 else
1823 save_stack_trace(&trace);
1824 }
1825
1826 entry->size = trace.nr_entries;
1827
1828 if (!call_filter_check_discard(call, entry, buffer, event))
1829 __buffer_unlock_commit(buffer, event);
1830
1831 out:
1832 /* Again, don't let gcc optimize things here */
1833 barrier();
1834 __this_cpu_dec(ftrace_stack_reserve);
1835 preempt_enable_notrace();
1836
1837 }
1838
1839 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1840 int skip, int pc, struct pt_regs *regs)
1841 {
1842 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1843 return;
1844
1845 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1846 }
1847
1848 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1849 int skip, int pc)
1850 {
1851 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1852 return;
1853
1854 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1855 }
1856
1857 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1858 int pc)
1859 {
1860 __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1861 }
1862
1863 /**
1864 * trace_dump_stack - record a stack back trace in the trace buffer
1865 * @skip: Number of functions to skip (helper handlers)
1866 */
1867 void trace_dump_stack(int skip)
1868 {
1869 unsigned long flags;
1870
1871 if (tracing_disabled || tracing_selftest_running)
1872 return;
1873
1874 local_save_flags(flags);
1875
1876 /*
1877 * Skip 3 more, seems to get us at the caller of
1878 * this function.
1879 */
1880 skip += 3;
1881 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1882 flags, skip, preempt_count(), NULL);
1883 }
1884
1885 static DEFINE_PER_CPU(int, user_stack_count);
1886
1887 void
1888 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1889 {
1890 struct ftrace_event_call *call = &event_user_stack;
1891 struct ring_buffer_event *event;
1892 struct userstack_entry *entry;
1893 struct stack_trace trace;
1894
1895 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1896 return;
1897
1898 /*
1899 * NMIs can not handle page faults, even with fix ups.
1900 * The save user stack can (and often does) fault.
1901 */
1902 if (unlikely(in_nmi()))
1903 return;
1904
1905 /*
1906 * prevent recursion, since the user stack tracing may
1907 * trigger other kernel events.
1908 */
1909 preempt_disable();
1910 if (__this_cpu_read(user_stack_count))
1911 goto out;
1912
1913 __this_cpu_inc(user_stack_count);
1914
1915 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1916 sizeof(*entry), flags, pc);
1917 if (!event)
1918 goto out_drop_count;
1919 entry = ring_buffer_event_data(event);
1920
1921 entry->tgid = current->tgid;
1922 memset(&entry->caller, 0, sizeof(entry->caller));
1923
1924 trace.nr_entries = 0;
1925 trace.max_entries = FTRACE_STACK_ENTRIES;
1926 trace.skip = 0;
1927 trace.entries = entry->caller;
1928
1929 save_stack_trace_user(&trace);
1930 if (!call_filter_check_discard(call, entry, buffer, event))
1931 __buffer_unlock_commit(buffer, event);
1932
1933 out_drop_count:
1934 __this_cpu_dec(user_stack_count);
1935 out:
1936 preempt_enable();
1937 }
1938
1939 #ifdef UNUSED
1940 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1941 {
1942 ftrace_trace_userstack(tr, flags, preempt_count());
1943 }
1944 #endif /* UNUSED */
1945
1946 #endif /* CONFIG_STACKTRACE */
1947
1948 /* created for use with alloc_percpu */
1949 struct trace_buffer_struct {
1950 char buffer[TRACE_BUF_SIZE];
1951 };
1952
1953 static struct trace_buffer_struct *trace_percpu_buffer;
1954 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1955 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1956 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1957
1958 /*
1959 * The buffer used is dependent on the context. There is a per cpu
1960 * buffer for normal context, softirq contex, hard irq context and
1961 * for NMI context. Thise allows for lockless recording.
1962 *
1963 * Note, if the buffers failed to be allocated, then this returns NULL
1964 */
1965 static char *get_trace_buf(void)
1966 {
1967 struct trace_buffer_struct *percpu_buffer;
1968
1969 /*
1970 * If we have allocated per cpu buffers, then we do not
1971 * need to do any locking.
1972 */
1973 if (in_nmi())
1974 percpu_buffer = trace_percpu_nmi_buffer;
1975 else if (in_irq())
1976 percpu_buffer = trace_percpu_irq_buffer;
1977 else if (in_softirq())
1978 percpu_buffer = trace_percpu_sirq_buffer;
1979 else
1980 percpu_buffer = trace_percpu_buffer;
1981
1982 if (!percpu_buffer)
1983 return NULL;
1984
1985 return this_cpu_ptr(&percpu_buffer->buffer[0]);
1986 }
1987
1988 static int alloc_percpu_trace_buffer(void)
1989 {
1990 struct trace_buffer_struct *buffers;
1991 struct trace_buffer_struct *sirq_buffers;
1992 struct trace_buffer_struct *irq_buffers;
1993 struct trace_buffer_struct *nmi_buffers;
1994
1995 buffers = alloc_percpu(struct trace_buffer_struct);
1996 if (!buffers)
1997 goto err_warn;
1998
1999 sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2000 if (!sirq_buffers)
2001 goto err_sirq;
2002
2003 irq_buffers = alloc_percpu(struct trace_buffer_struct);
2004 if (!irq_buffers)
2005 goto err_irq;
2006
2007 nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2008 if (!nmi_buffers)
2009 goto err_nmi;
2010
2011 trace_percpu_buffer = buffers;
2012 trace_percpu_sirq_buffer = sirq_buffers;
2013 trace_percpu_irq_buffer = irq_buffers;
2014 trace_percpu_nmi_buffer = nmi_buffers;
2015
2016 return 0;
2017
2018 err_nmi:
2019 free_percpu(irq_buffers);
2020 err_irq:
2021 free_percpu(sirq_buffers);
2022 err_sirq:
2023 free_percpu(buffers);
2024 err_warn:
2025 WARN(1, "Could not allocate percpu trace_printk buffer");
2026 return -ENOMEM;
2027 }
2028
2029 static int buffers_allocated;
2030
2031 void trace_printk_init_buffers(void)
2032 {
2033 if (buffers_allocated)
2034 return;
2035
2036 if (alloc_percpu_trace_buffer())
2037 return;
2038
2039 /* trace_printk() is for debug use only. Don't use it in production. */
2040
2041 pr_warning("\n");
2042 pr_warning("**********************************************************\n");
2043 pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2044 pr_warning("** **\n");
2045 pr_warning("** trace_printk() being used. Allocating extra memory. **\n");
2046 pr_warning("** **\n");
2047 pr_warning("** This means that this is a DEBUG kernel and it is **\n");
2048 pr_warning("** unsafe for production use. **\n");
2049 pr_warning("** **\n");
2050 pr_warning("** If you see this message and you are not debugging **\n");
2051 pr_warning("** the kernel, report this immediately to your vendor! **\n");
2052 pr_warning("** **\n");
2053 pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2054 pr_warning("**********************************************************\n");
2055
2056 /* Expand the buffers to set size */
2057 tracing_update_buffers();
2058
2059 buffers_allocated = 1;
2060
2061 /*
2062 * trace_printk_init_buffers() can be called by modules.
2063 * If that happens, then we need to start cmdline recording
2064 * directly here. If the global_trace.buffer is already
2065 * allocated here, then this was called by module code.
2066 */
2067 if (global_trace.trace_buffer.buffer)
2068 tracing_start_cmdline_record();
2069 }
2070
2071 void trace_printk_start_comm(void)
2072 {
2073 /* Start tracing comms if trace printk is set */
2074 if (!buffers_allocated)
2075 return;
2076 tracing_start_cmdline_record();
2077 }
2078
2079 static void trace_printk_start_stop_comm(int enabled)
2080 {
2081 if (!buffers_allocated)
2082 return;
2083
2084 if (enabled)
2085 tracing_start_cmdline_record();
2086 else
2087 tracing_stop_cmdline_record();
2088 }
2089
2090 /**
2091 * trace_vbprintk - write binary msg to tracing buffer
2092 *
2093 */
2094 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2095 {
2096 struct ftrace_event_call *call = &event_bprint;
2097 struct ring_buffer_event *event;
2098 struct ring_buffer *buffer;
2099 struct trace_array *tr = &global_trace;
2100 struct bprint_entry *entry;
2101 unsigned long flags;
2102 char *tbuffer;
2103 int len = 0, size, pc;
2104
2105 if (unlikely(tracing_selftest_running || tracing_disabled))
2106 return 0;
2107
2108 /* Don't pollute graph traces with trace_vprintk internals */
2109 pause_graph_tracing();
2110
2111 pc = preempt_count();
2112 preempt_disable_notrace();
2113
2114 tbuffer = get_trace_buf();
2115 if (!tbuffer) {
2116 len = 0;
2117 goto out;
2118 }
2119
2120 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2121
2122 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2123 goto out;
2124
2125 local_save_flags(flags);
2126 size = sizeof(*entry) + sizeof(u32) * len;
2127 buffer = tr->trace_buffer.buffer;
2128 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2129 flags, pc);
2130 if (!event)
2131 goto out;
2132 entry = ring_buffer_event_data(event);
2133 entry->ip = ip;
2134 entry->fmt = fmt;
2135
2136 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2137 if (!call_filter_check_discard(call, entry, buffer, event)) {
2138 __buffer_unlock_commit(buffer, event);
2139 ftrace_trace_stack(buffer, flags, 6, pc);
2140 }
2141
2142 out:
2143 preempt_enable_notrace();
2144 unpause_graph_tracing();
2145
2146 return len;
2147 }
2148 EXPORT_SYMBOL_GPL(trace_vbprintk);
2149
2150 static int
2151 __trace_array_vprintk(struct ring_buffer *buffer,
2152 unsigned long ip, const char *fmt, va_list args)
2153 {
2154 struct ftrace_event_call *call = &event_print;
2155 struct ring_buffer_event *event;
2156 int len = 0, size, pc;
2157 struct print_entry *entry;
2158 unsigned long flags;
2159 char *tbuffer;
2160
2161 if (tracing_disabled || tracing_selftest_running)
2162 return 0;
2163
2164 /* Don't pollute graph traces with trace_vprintk internals */
2165 pause_graph_tracing();
2166
2167 pc = preempt_count();
2168 preempt_disable_notrace();
2169
2170
2171 tbuffer = get_trace_buf();
2172 if (!tbuffer) {
2173 len = 0;
2174 goto out;
2175 }
2176
2177 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2178
2179 local_save_flags(flags);
2180 size = sizeof(*entry) + len + 1;
2181 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2182 flags, pc);
2183 if (!event)
2184 goto out;
2185 entry = ring_buffer_event_data(event);
2186 entry->ip = ip;
2187
2188 memcpy(&entry->buf, tbuffer, len + 1);
2189 if (!call_filter_check_discard(call, entry, buffer, event)) {
2190 __buffer_unlock_commit(buffer, event);
2191 ftrace_trace_stack(buffer, flags, 6, pc);
2192 }
2193 out:
2194 preempt_enable_notrace();
2195 unpause_graph_tracing();
2196
2197 return len;
2198 }
2199
2200 int trace_array_vprintk(struct trace_array *tr,
2201 unsigned long ip, const char *fmt, va_list args)
2202 {
2203 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2204 }
2205
2206 int trace_array_printk(struct trace_array *tr,
2207 unsigned long ip, const char *fmt, ...)
2208 {
2209 int ret;
2210 va_list ap;
2211
2212 if (!(trace_flags & TRACE_ITER_PRINTK))
2213 return 0;
2214
2215 va_start(ap, fmt);
2216 ret = trace_array_vprintk(tr, ip, fmt, ap);
2217 va_end(ap);
2218 return ret;
2219 }
2220
2221 int trace_array_printk_buf(struct ring_buffer *buffer,
2222 unsigned long ip, const char *fmt, ...)
2223 {
2224 int ret;
2225 va_list ap;
2226
2227 if (!(trace_flags & TRACE_ITER_PRINTK))
2228 return 0;
2229
2230 va_start(ap, fmt);
2231 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2232 va_end(ap);
2233 return ret;
2234 }
2235
2236 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2237 {
2238 return trace_array_vprintk(&global_trace, ip, fmt, args);
2239 }
2240 EXPORT_SYMBOL_GPL(trace_vprintk);
2241
2242 static void trace_iterator_increment(struct trace_iterator *iter)
2243 {
2244 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2245
2246 iter->idx++;
2247 if (buf_iter)
2248 ring_buffer_read(buf_iter, NULL);
2249 }
2250
2251 static struct trace_entry *
2252 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2253 unsigned long *lost_events)
2254 {
2255 struct ring_buffer_event *event;
2256 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2257
2258 if (buf_iter)
2259 event = ring_buffer_iter_peek(buf_iter, ts);
2260 else
2261 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2262 lost_events);
2263
2264 if (event) {
2265 iter->ent_size = ring_buffer_event_length(event);
2266 return ring_buffer_event_data(event);
2267 }
2268 iter->ent_size = 0;
2269 return NULL;
2270 }
2271
2272 static struct trace_entry *
2273 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2274 unsigned long *missing_events, u64 *ent_ts)
2275 {
2276 struct ring_buffer *buffer = iter->trace_buffer->buffer;
2277 struct trace_entry *ent, *next = NULL;
2278 unsigned long lost_events = 0, next_lost = 0;
2279 int cpu_file = iter->cpu_file;
2280 u64 next_ts = 0, ts;
2281 int next_cpu = -1;
2282 int next_size = 0;
2283 int cpu;
2284
2285 /*
2286 * If we are in a per_cpu trace file, don't bother by iterating over
2287 * all cpu and peek directly.
2288 */
2289 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2290 if (ring_buffer_empty_cpu(buffer, cpu_file))
2291 return NULL;
2292 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2293 if (ent_cpu)
2294 *ent_cpu = cpu_file;
2295
2296 return ent;
2297 }
2298
2299 for_each_tracing_cpu(cpu) {
2300
2301 if (ring_buffer_empty_cpu(buffer, cpu))
2302 continue;
2303
2304 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2305
2306 /*
2307 * Pick the entry with the smallest timestamp:
2308 */
2309 if (ent && (!next || ts < next_ts)) {
2310 next = ent;
2311 next_cpu = cpu;
2312 next_ts = ts;
2313 next_lost = lost_events;
2314 next_size = iter->ent_size;
2315 }
2316 }
2317
2318 iter->ent_size = next_size;
2319
2320 if (ent_cpu)
2321 *ent_cpu = next_cpu;
2322
2323 if (ent_ts)
2324 *ent_ts = next_ts;
2325
2326 if (missing_events)
2327 *missing_events = next_lost;
2328
2329 return next;
2330 }
2331
2332 /* Find the next real entry, without updating the iterator itself */
2333 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2334 int *ent_cpu, u64 *ent_ts)
2335 {
2336 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2337 }
2338
2339 /* Find the next real entry, and increment the iterator to the next entry */
2340 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2341 {
2342 iter->ent = __find_next_entry(iter, &iter->cpu,
2343 &iter->lost_events, &iter->ts);
2344
2345 if (iter->ent)
2346 trace_iterator_increment(iter);
2347
2348 return iter->ent ? iter : NULL;
2349 }
2350
2351 static void trace_consume(struct trace_iterator *iter)
2352 {
2353 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2354 &iter->lost_events);
2355 }
2356
2357 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2358 {
2359 struct trace_iterator *iter = m->private;
2360 int i = (int)*pos;
2361 void *ent;
2362
2363 WARN_ON_ONCE(iter->leftover);
2364
2365 (*pos)++;
2366
2367 /* can't go backwards */
2368 if (iter->idx > i)
2369 return NULL;
2370
2371 if (iter->idx < 0)
2372 ent = trace_find_next_entry_inc(iter);
2373 else
2374 ent = iter;
2375
2376 while (ent && iter->idx < i)
2377 ent = trace_find_next_entry_inc(iter);
2378
2379 iter->pos = *pos;
2380
2381 return ent;
2382 }
2383
2384 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2385 {
2386 struct ring_buffer_event *event;
2387 struct ring_buffer_iter *buf_iter;
2388 unsigned long entries = 0;
2389 u64 ts;
2390
2391 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2392
2393 buf_iter = trace_buffer_iter(iter, cpu);
2394 if (!buf_iter)
2395 return;
2396
2397 ring_buffer_iter_reset(buf_iter);
2398
2399 /*
2400 * We could have the case with the max latency tracers
2401 * that a reset never took place on a cpu. This is evident
2402 * by the timestamp being before the start of the buffer.
2403 */
2404 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2405 if (ts >= iter->trace_buffer->time_start)
2406 break;
2407 entries++;
2408 ring_buffer_read(buf_iter, NULL);
2409 }
2410
2411 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2412 }
2413
2414 /*
2415 * The current tracer is copied to avoid a global locking
2416 * all around.
2417 */
2418 static void *s_start(struct seq_file *m, loff_t *pos)
2419 {
2420 struct trace_iterator *iter = m->private;
2421 struct trace_array *tr = iter->tr;
2422 int cpu_file = iter->cpu_file;
2423 void *p = NULL;
2424 loff_t l = 0;
2425 int cpu;
2426
2427 /*
2428 * copy the tracer to avoid using a global lock all around.
2429 * iter->trace is a copy of current_trace, the pointer to the
2430 * name may be used instead of a strcmp(), as iter->trace->name
2431 * will point to the same string as current_trace->name.
2432 */
2433 mutex_lock(&trace_types_lock);
2434 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2435 *iter->trace = *tr->current_trace;
2436 mutex_unlock(&trace_types_lock);
2437
2438 #ifdef CONFIG_TRACER_MAX_TRACE
2439 if (iter->snapshot && iter->trace->use_max_tr)
2440 return ERR_PTR(-EBUSY);
2441 #endif
2442
2443 if (!iter->snapshot)
2444 atomic_inc(&trace_record_cmdline_disabled);
2445
2446 if (*pos != iter->pos) {
2447 iter->ent = NULL;
2448 iter->cpu = 0;
2449 iter->idx = -1;
2450
2451 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2452 for_each_tracing_cpu(cpu)
2453 tracing_iter_reset(iter, cpu);
2454 } else
2455 tracing_iter_reset(iter, cpu_file);
2456
2457 iter->leftover = 0;
2458 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2459 ;
2460
2461 } else {
2462 /*
2463 * If we overflowed the seq_file before, then we want
2464 * to just reuse the trace_seq buffer again.
2465 */
2466 if (iter->leftover)
2467 p = iter;
2468 else {
2469 l = *pos - 1;
2470 p = s_next(m, p, &l);
2471 }
2472 }
2473
2474 trace_event_read_lock();
2475 trace_access_lock(cpu_file);
2476 return p;
2477 }
2478
2479 static void s_stop(struct seq_file *m, void *p)
2480 {
2481 struct trace_iterator *iter = m->private;
2482
2483 #ifdef CONFIG_TRACER_MAX_TRACE
2484 if (iter->snapshot && iter->trace->use_max_tr)
2485 return;
2486 #endif
2487
2488 if (!iter->snapshot)
2489 atomic_dec(&trace_record_cmdline_disabled);
2490
2491 trace_access_unlock(iter->cpu_file);
2492 trace_event_read_unlock();
2493 }
2494
2495 static void
2496 get_total_entries(struct trace_buffer *buf,
2497 unsigned long *total, unsigned long *entries)
2498 {
2499 unsigned long count;
2500 int cpu;
2501
2502 *total = 0;
2503 *entries = 0;
2504
2505 for_each_tracing_cpu(cpu) {
2506 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2507 /*
2508 * If this buffer has skipped entries, then we hold all
2509 * entries for the trace and we need to ignore the
2510 * ones before the time stamp.
2511 */
2512 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2513 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2514 /* total is the same as the entries */
2515 *total += count;
2516 } else
2517 *total += count +
2518 ring_buffer_overrun_cpu(buf->buffer, cpu);
2519 *entries += count;
2520 }
2521 }
2522
2523 static void print_lat_help_header(struct seq_file *m)
2524 {
2525 seq_puts(m, "# _------=> CPU# \n"
2526 "# / _-----=> irqs-off \n"
2527 "# | / _----=> need-resched \n"
2528 "# || / _---=> hardirq/softirq \n"
2529 "# ||| / _--=> preempt-depth \n"
2530 "# |||| / delay \n"
2531 "# cmd pid ||||| time | caller \n"
2532 "# \\ / ||||| \\ | / \n");
2533 }
2534
2535 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2536 {
2537 unsigned long total;
2538 unsigned long entries;
2539
2540 get_total_entries(buf, &total, &entries);
2541 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
2542 entries, total, num_online_cpus());
2543 seq_puts(m, "#\n");
2544 }
2545
2546 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2547 {
2548 print_event_info(buf, m);
2549 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"
2550 "# | | | | |\n");
2551 }
2552
2553 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2554 {
2555 print_event_info(buf, m);
2556 seq_puts(m, "# _-----=> irqs-off\n"
2557 "# / _----=> need-resched\n"
2558 "# | / _---=> hardirq/softirq\n"
2559 "# || / _--=> preempt-depth\n"
2560 "# ||| / delay\n"
2561 "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
2562 "# | | | |||| | |\n");
2563 }
2564
2565 void
2566 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2567 {
2568 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2569 struct trace_buffer *buf = iter->trace_buffer;
2570 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2571 struct tracer *type = iter->trace;
2572 unsigned long entries;
2573 unsigned long total;
2574 const char *name = "preemption";
2575
2576 name = type->name;
2577
2578 get_total_entries(buf, &total, &entries);
2579
2580 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2581 name, UTS_RELEASE);
2582 seq_puts(m, "# -----------------------------------"
2583 "---------------------------------\n");
2584 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2585 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2586 nsecs_to_usecs(data->saved_latency),
2587 entries,
2588 total,
2589 buf->cpu,
2590 #if defined(CONFIG_PREEMPT_NONE)
2591 "server",
2592 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2593 "desktop",
2594 #elif defined(CONFIG_PREEMPT)
2595 "preempt",
2596 #else
2597 "unknown",
2598 #endif
2599 /* These are reserved for later use */
2600 0, 0, 0, 0);
2601 #ifdef CONFIG_SMP
2602 seq_printf(m, " #P:%d)\n", num_online_cpus());
2603 #else
2604 seq_puts(m, ")\n");
2605 #endif
2606 seq_puts(m, "# -----------------\n");
2607 seq_printf(m, "# | task: %.16s-%d "
2608 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2609 data->comm, data->pid,
2610 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2611 data->policy, data->rt_priority);
2612 seq_puts(m, "# -----------------\n");
2613
2614 if (data->critical_start) {
2615 seq_puts(m, "# => started at: ");
2616 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2617 trace_print_seq(m, &iter->seq);
2618 seq_puts(m, "\n# => ended at: ");
2619 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2620 trace_print_seq(m, &iter->seq);
2621 seq_puts(m, "\n#\n");
2622 }
2623
2624 seq_puts(m, "#\n");
2625 }
2626
2627 static void test_cpu_buff_start(struct trace_iterator *iter)
2628 {
2629 struct trace_seq *s = &iter->seq;
2630
2631 if (!(trace_flags & TRACE_ITER_ANNOTATE))
2632 return;
2633
2634 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2635 return;
2636
2637 if (cpumask_test_cpu(iter->cpu, iter->started))
2638 return;
2639
2640 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2641 return;
2642
2643 cpumask_set_cpu(iter->cpu, iter->started);
2644
2645 /* Don't print started cpu buffer for the first entry of the trace */
2646 if (iter->idx > 1)
2647 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2648 iter->cpu);
2649 }
2650
2651 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2652 {
2653 struct trace_seq *s = &iter->seq;
2654 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2655 struct trace_entry *entry;
2656 struct trace_event *event;
2657
2658 entry = iter->ent;
2659
2660 test_cpu_buff_start(iter);
2661
2662 event = ftrace_find_event(entry->type);
2663
2664 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2665 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2666 trace_print_lat_context(iter);
2667 else
2668 trace_print_context(iter);
2669 }
2670
2671 if (trace_seq_has_overflowed(s))
2672 return TRACE_TYPE_PARTIAL_LINE;
2673
2674 if (event)
2675 return event->funcs->trace(iter, sym_flags, event);
2676
2677 trace_seq_printf(s, "Unknown type %d\n", entry->type);
2678
2679 return trace_handle_return(s);
2680 }
2681
2682 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2683 {
2684 struct trace_seq *s = &iter->seq;
2685 struct trace_entry *entry;
2686 struct trace_event *event;
2687
2688 entry = iter->ent;
2689
2690 if (trace_flags & TRACE_ITER_CONTEXT_INFO)
2691 trace_seq_printf(s, "%d %d %llu ",
2692 entry->pid, iter->cpu, iter->ts);
2693
2694 if (trace_seq_has_overflowed(s))
2695 return TRACE_TYPE_PARTIAL_LINE;
2696
2697 event = ftrace_find_event(entry->type);
2698 if (event)
2699 return event->funcs->raw(iter, 0, event);
2700
2701 trace_seq_printf(s, "%d ?\n", entry->type);
2702
2703 return trace_handle_return(s);
2704 }
2705
2706 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2707 {
2708 struct trace_seq *s = &iter->seq;
2709 unsigned char newline = '\n';
2710 struct trace_entry *entry;
2711 struct trace_event *event;
2712
2713 entry = iter->ent;
2714
2715 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2716 SEQ_PUT_HEX_FIELD(s, entry->pid);
2717 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2718 SEQ_PUT_HEX_FIELD(s, iter->ts);
2719 if (trace_seq_has_overflowed(s))
2720 return TRACE_TYPE_PARTIAL_LINE;
2721 }
2722
2723 event = ftrace_find_event(entry->type);
2724 if (event) {
2725 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2726 if (ret != TRACE_TYPE_HANDLED)
2727 return ret;
2728 }
2729
2730 SEQ_PUT_FIELD(s, newline);
2731
2732 return trace_handle_return(s);
2733 }
2734
2735 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2736 {
2737 struct trace_seq *s = &iter->seq;
2738 struct trace_entry *entry;
2739 struct trace_event *event;
2740
2741 entry = iter->ent;
2742
2743 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2744 SEQ_PUT_FIELD(s, entry->pid);
2745 SEQ_PUT_FIELD(s, iter->cpu);
2746 SEQ_PUT_FIELD(s, iter->ts);
2747 if (trace_seq_has_overflowed(s))
2748 return TRACE_TYPE_PARTIAL_LINE;
2749 }
2750
2751 event = ftrace_find_event(entry->type);
2752 return event ? event->funcs->binary(iter, 0, event) :
2753 TRACE_TYPE_HANDLED;
2754 }
2755
2756 int trace_empty(struct trace_iterator *iter)
2757 {
2758 struct ring_buffer_iter *buf_iter;
2759 int cpu;
2760
2761 /* If we are looking at one CPU buffer, only check that one */
2762 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2763 cpu = iter->cpu_file;
2764 buf_iter = trace_buffer_iter(iter, cpu);
2765 if (buf_iter) {
2766 if (!ring_buffer_iter_empty(buf_iter))
2767 return 0;
2768 } else {
2769 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2770 return 0;
2771 }
2772 return 1;
2773 }
2774
2775 for_each_tracing_cpu(cpu) {
2776 buf_iter = trace_buffer_iter(iter, cpu);
2777 if (buf_iter) {
2778 if (!ring_buffer_iter_empty(buf_iter))
2779 return 0;
2780 } else {
2781 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2782 return 0;
2783 }
2784 }
2785
2786 return 1;
2787 }
2788
2789 /* Called with trace_event_read_lock() held. */
2790 enum print_line_t print_trace_line(struct trace_iterator *iter)
2791 {
2792 enum print_line_t ret;
2793
2794 if (iter->lost_events) {
2795 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2796 iter->cpu, iter->lost_events);
2797 if (trace_seq_has_overflowed(&iter->seq))
2798 return TRACE_TYPE_PARTIAL_LINE;
2799 }
2800
2801 if (iter->trace && iter->trace->print_line) {
2802 ret = iter->trace->print_line(iter);
2803 if (ret != TRACE_TYPE_UNHANDLED)
2804 return ret;
2805 }
2806
2807 if (iter->ent->type == TRACE_BPUTS &&
2808 trace_flags & TRACE_ITER_PRINTK &&
2809 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2810 return trace_print_bputs_msg_only(iter);
2811
2812 if (iter->ent->type == TRACE_BPRINT &&
2813 trace_flags & TRACE_ITER_PRINTK &&
2814 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2815 return trace_print_bprintk_msg_only(iter);
2816
2817 if (iter->ent->type == TRACE_PRINT &&
2818 trace_flags & TRACE_ITER_PRINTK &&
2819 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2820 return trace_print_printk_msg_only(iter);
2821
2822 if (trace_flags & TRACE_ITER_BIN)
2823 return print_bin_fmt(iter);
2824
2825 if (trace_flags & TRACE_ITER_HEX)
2826 return print_hex_fmt(iter);
2827
2828 if (trace_flags & TRACE_ITER_RAW)
2829 return print_raw_fmt(iter);
2830
2831 return print_trace_fmt(iter);
2832 }
2833
2834 void trace_latency_header(struct seq_file *m)
2835 {
2836 struct trace_iterator *iter = m->private;
2837
2838 /* print nothing if the buffers are empty */
2839 if (trace_empty(iter))
2840 return;
2841
2842 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2843 print_trace_header(m, iter);
2844
2845 if (!(trace_flags & TRACE_ITER_VERBOSE))
2846 print_lat_help_header(m);
2847 }
2848
2849 void trace_default_header(struct seq_file *m)
2850 {
2851 struct trace_iterator *iter = m->private;
2852
2853 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2854 return;
2855
2856 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2857 /* print nothing if the buffers are empty */
2858 if (trace_empty(iter))
2859 return;
2860 print_trace_header(m, iter);
2861 if (!(trace_flags & TRACE_ITER_VERBOSE))
2862 print_lat_help_header(m);
2863 } else {
2864 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2865 if (trace_flags & TRACE_ITER_IRQ_INFO)
2866 print_func_help_header_irq(iter->trace_buffer, m);
2867 else
2868 print_func_help_header(iter->trace_buffer, m);
2869 }
2870 }
2871 }
2872
2873 static void test_ftrace_alive(struct seq_file *m)
2874 {
2875 if (!ftrace_is_dead())
2876 return;
2877 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2878 "# MAY BE MISSING FUNCTION EVENTS\n");
2879 }
2880
2881 #ifdef CONFIG_TRACER_MAX_TRACE
2882 static void show_snapshot_main_help(struct seq_file *m)
2883 {
2884 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2885 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2886 "# Takes a snapshot of the main buffer.\n"
2887 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2888 "# (Doesn't have to be '2' works with any number that\n"
2889 "# is not a '0' or '1')\n");
2890 }
2891
2892 static void show_snapshot_percpu_help(struct seq_file *m)
2893 {
2894 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2895 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2896 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2897 "# Takes a snapshot of the main buffer for this cpu.\n");
2898 #else
2899 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2900 "# Must use main snapshot file to allocate.\n");
2901 #endif
2902 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2903 "# (Doesn't have to be '2' works with any number that\n"
2904 "# is not a '0' or '1')\n");
2905 }
2906
2907 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2908 {
2909 if (iter->tr->allocated_snapshot)
2910 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2911 else
2912 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2913
2914 seq_puts(m, "# Snapshot commands:\n");
2915 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2916 show_snapshot_main_help(m);
2917 else
2918 show_snapshot_percpu_help(m);
2919 }
2920 #else
2921 /* Should never be called */
2922 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2923 #endif
2924
2925 static int s_show(struct seq_file *m, void *v)
2926 {
2927 struct trace_iterator *iter = v;
2928 int ret;
2929
2930 if (iter->ent == NULL) {
2931 if (iter->tr) {
2932 seq_printf(m, "# tracer: %s\n", iter->trace->name);
2933 seq_puts(m, "#\n");
2934 test_ftrace_alive(m);
2935 }
2936 if (iter->snapshot && trace_empty(iter))
2937 print_snapshot_help(m, iter);
2938 else if (iter->trace && iter->trace->print_header)
2939 iter->trace->print_header(m);
2940 else
2941 trace_default_header(m);
2942
2943 } else if (iter->leftover) {
2944 /*
2945 * If we filled the seq_file buffer earlier, we
2946 * want to just show it now.
2947 */
2948 ret = trace_print_seq(m, &iter->seq);
2949
2950 /* ret should this time be zero, but you never know */
2951 iter->leftover = ret;
2952
2953 } else {
2954 print_trace_line(iter);
2955 ret = trace_print_seq(m, &iter->seq);
2956 /*
2957 * If we overflow the seq_file buffer, then it will
2958 * ask us for this data again at start up.
2959 * Use that instead.
2960 * ret is 0 if seq_file write succeeded.
2961 * -1 otherwise.
2962 */
2963 iter->leftover = ret;
2964 }
2965
2966 return 0;
2967 }
2968
2969 /*
2970 * Should be used after trace_array_get(), trace_types_lock
2971 * ensures that i_cdev was already initialized.
2972 */
2973 static inline int tracing_get_cpu(struct inode *inode)
2974 {
2975 if (inode->i_cdev) /* See trace_create_cpu_file() */
2976 return (long)inode->i_cdev - 1;
2977 return RING_BUFFER_ALL_CPUS;
2978 }
2979
2980 static const struct seq_operations tracer_seq_ops = {
2981 .start = s_start,
2982 .next = s_next,
2983 .stop = s_stop,
2984 .show = s_show,
2985 };
2986
2987 static struct trace_iterator *
2988 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2989 {
2990 struct trace_array *tr = inode->i_private;
2991 struct trace_iterator *iter;
2992 int cpu;
2993
2994 if (tracing_disabled)
2995 return ERR_PTR(-ENODEV);
2996
2997 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2998 if (!iter)
2999 return ERR_PTR(-ENOMEM);
3000
3001 iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
3002 GFP_KERNEL);
3003 if (!iter->buffer_iter)
3004 goto release;
3005
3006 /*
3007 * We make a copy of the current tracer to avoid concurrent
3008 * changes on it while we are reading.
3009 */
3010 mutex_lock(&trace_types_lock);
3011 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3012 if (!iter->trace)
3013 goto fail;
3014
3015 *iter->trace = *tr->current_trace;
3016
3017 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3018 goto fail;
3019
3020 iter->tr = tr;
3021
3022 #ifdef CONFIG_TRACER_MAX_TRACE
3023 /* Currently only the top directory has a snapshot */
3024 if (tr->current_trace->print_max || snapshot)
3025 iter->trace_buffer = &tr->max_buffer;
3026 else
3027 #endif
3028 iter->trace_buffer = &tr->trace_buffer;
3029 iter->snapshot = snapshot;
3030 iter->pos = -1;
3031 iter->cpu_file = tracing_get_cpu(inode);
3032 mutex_init(&iter->mutex);
3033
3034 /* Notify the tracer early; before we stop tracing. */
3035 if (iter->trace && iter->trace->open)
3036 iter->trace->open(iter);
3037
3038 /* Annotate start of buffers if we had overruns */
3039 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3040 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3041
3042 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3043 if (trace_clocks[tr->clock_id].in_ns)
3044 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3045
3046 /* stop the trace while dumping if we are not opening "snapshot" */
3047 if (!iter->snapshot)
3048 tracing_stop_tr(tr);
3049
3050 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3051 for_each_tracing_cpu(cpu) {
3052 iter->buffer_iter[cpu] =
3053 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3054 }
3055 ring_buffer_read_prepare_sync();
3056 for_each_tracing_cpu(cpu) {
3057 ring_buffer_read_start(iter->buffer_iter[cpu]);
3058 tracing_iter_reset(iter, cpu);
3059 }
3060 } else {
3061 cpu = iter->cpu_file;
3062 iter->buffer_iter[cpu] =
3063 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3064 ring_buffer_read_prepare_sync();
3065 ring_buffer_read_start(iter->buffer_iter[cpu]);
3066 tracing_iter_reset(iter, cpu);
3067 }
3068
3069 mutex_unlock(&trace_types_lock);
3070
3071 return iter;
3072
3073 fail:
3074 mutex_unlock(&trace_types_lock);
3075 kfree(iter->trace);
3076 kfree(iter->buffer_iter);
3077 release:
3078 seq_release_private(inode, file);
3079 return ERR_PTR(-ENOMEM);
3080 }
3081
3082 int tracing_open_generic(struct inode *inode, struct file *filp)
3083 {
3084 if (tracing_disabled)
3085 return -ENODEV;
3086
3087 filp->private_data = inode->i_private;
3088 return 0;
3089 }
3090
3091 bool tracing_is_disabled(void)
3092 {
3093 return (tracing_disabled) ? true: false;
3094 }
3095
3096 /*
3097 * Open and update trace_array ref count.
3098 * Must have the current trace_array passed to it.
3099 */
3100 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3101 {
3102 struct trace_array *tr = inode->i_private;
3103
3104 if (tracing_disabled)
3105 return -ENODEV;
3106
3107 if (trace_array_get(tr) < 0)
3108 return -ENODEV;
3109
3110 filp->private_data = inode->i_private;
3111
3112 return 0;
3113 }
3114
3115 static int tracing_release(struct inode *inode, struct file *file)
3116 {
3117 struct trace_array *tr = inode->i_private;
3118 struct seq_file *m = file->private_data;
3119 struct trace_iterator *iter;
3120 int cpu;
3121
3122 if (!(file->f_mode & FMODE_READ)) {
3123 trace_array_put(tr);
3124 return 0;
3125 }
3126
3127 /* Writes do not use seq_file */
3128 iter = m->private;
3129 mutex_lock(&trace_types_lock);
3130
3131 for_each_tracing_cpu(cpu) {
3132 if (iter->buffer_iter[cpu])
3133 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3134 }
3135
3136 if (iter->trace && iter->trace->close)
3137 iter->trace->close(iter);
3138
3139 if (!iter->snapshot)
3140 /* reenable tracing if it was previously enabled */
3141 tracing_start_tr(tr);
3142
3143 __trace_array_put(tr);
3144
3145 mutex_unlock(&trace_types_lock);
3146
3147 mutex_destroy(&iter->mutex);
3148 free_cpumask_var(iter->started);
3149 kfree(iter->trace);
3150 kfree(iter->buffer_iter);
3151 seq_release_private(inode, file);
3152
3153 return 0;
3154 }
3155
3156 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3157 {
3158 struct trace_array *tr = inode->i_private;
3159
3160 trace_array_put(tr);
3161 return 0;
3162 }
3163
3164 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3165 {
3166 struct trace_array *tr = inode->i_private;
3167
3168 trace_array_put(tr);
3169
3170 return single_release(inode, file);
3171 }
3172
3173 static int tracing_open(struct inode *inode, struct file *file)
3174 {
3175 struct trace_array *tr = inode->i_private;
3176 struct trace_iterator *iter;
3177 int ret = 0;
3178
3179 if (trace_array_get(tr) < 0)
3180 return -ENODEV;
3181
3182 /* If this file was open for write, then erase contents */
3183 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3184 int cpu = tracing_get_cpu(inode);
3185
3186 if (cpu == RING_BUFFER_ALL_CPUS)
3187 tracing_reset_online_cpus(&tr->trace_buffer);
3188 else
3189 tracing_reset(&tr->trace_buffer, cpu);
3190 }
3191
3192 if (file->f_mode & FMODE_READ) {
3193 iter = __tracing_open(inode, file, false);
3194 if (IS_ERR(iter))
3195 ret = PTR_ERR(iter);
3196 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3197 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3198 }
3199
3200 if (ret < 0)
3201 trace_array_put(tr);
3202
3203 return ret;
3204 }
3205
3206 /*
3207 * Some tracers are not suitable for instance buffers.
3208 * A tracer is always available for the global array (toplevel)
3209 * or if it explicitly states that it is.
3210 */
3211 static bool
3212 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3213 {
3214 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3215 }
3216
3217 /* Find the next tracer that this trace array may use */
3218 static struct tracer *
3219 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3220 {
3221 while (t && !trace_ok_for_array(t, tr))
3222 t = t->next;
3223
3224 return t;
3225 }
3226
3227 static void *
3228 t_next(struct seq_file *m, void *v, loff_t *pos)
3229 {
3230 struct trace_array *tr = m->private;
3231 struct tracer *t = v;
3232
3233 (*pos)++;
3234
3235 if (t)
3236 t = get_tracer_for_array(tr, t->next);
3237
3238 return t;
3239 }
3240
3241 static void *t_start(struct seq_file *m, loff_t *pos)
3242 {
3243 struct trace_array *tr = m->private;
3244 struct tracer *t;
3245 loff_t l = 0;
3246
3247 mutex_lock(&trace_types_lock);
3248
3249 t = get_tracer_for_array(tr, trace_types);
3250 for (; t && l < *pos; t = t_next(m, t, &l))
3251 ;
3252
3253 return t;
3254 }
3255
3256 static void t_stop(struct seq_file *m, void *p)
3257 {
3258 mutex_unlock(&trace_types_lock);
3259 }
3260
3261 static int t_show(struct seq_file *m, void *v)
3262 {
3263 struct tracer *t = v;
3264
3265 if (!t)
3266 return 0;
3267
3268 seq_puts(m, t->name);
3269 if (t->next)
3270 seq_putc(m, ' ');
3271 else
3272 seq_putc(m, '\n');
3273
3274 return 0;
3275 }
3276
3277 static const struct seq_operations show_traces_seq_ops = {
3278 .start = t_start,
3279 .next = t_next,
3280 .stop = t_stop,
3281 .show = t_show,
3282 };
3283
3284 static int show_traces_open(struct inode *inode, struct file *file)
3285 {
3286 struct trace_array *tr = inode->i_private;
3287 struct seq_file *m;
3288 int ret;
3289
3290 if (tracing_disabled)
3291 return -ENODEV;
3292
3293 ret = seq_open(file, &show_traces_seq_ops);
3294 if (ret)
3295 return ret;
3296
3297 m = file->private_data;
3298 m->private = tr;
3299
3300 return 0;
3301 }
3302
3303 static ssize_t
3304 tracing_write_stub(struct file *filp, const char __user *ubuf,
3305 size_t count, loff_t *ppos)
3306 {
3307 return count;
3308 }
3309
3310 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3311 {
3312 int ret;
3313
3314 if (file->f_mode & FMODE_READ)
3315 ret = seq_lseek(file, offset, whence);
3316 else
3317 file->f_pos = ret = 0;
3318
3319 return ret;
3320 }
3321
3322 static const struct file_operations tracing_fops = {
3323 .open = tracing_open,
3324 .read = seq_read,
3325 .write = tracing_write_stub,
3326 .llseek = tracing_lseek,
3327 .release = tracing_release,
3328 };
3329
3330 static const struct file_operations show_traces_fops = {
3331 .open = show_traces_open,
3332 .read = seq_read,
3333 .release = seq_release,
3334 .llseek = seq_lseek,
3335 };
3336
3337 /*
3338 * The tracer itself will not take this lock, but still we want
3339 * to provide a consistent cpumask to user-space:
3340 */
3341 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3342
3343 /*
3344 * Temporary storage for the character representation of the
3345 * CPU bitmask (and one more byte for the newline):
3346 */
3347 static char mask_str[NR_CPUS + 1];
3348
3349 static ssize_t
3350 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3351 size_t count, loff_t *ppos)
3352 {
3353 struct trace_array *tr = file_inode(filp)->i_private;
3354 int len;
3355
3356 mutex_lock(&tracing_cpumask_update_lock);
3357
3358 len = snprintf(mask_str, count, "%*pb\n",
3359 cpumask_pr_args(tr->tracing_cpumask));
3360 if (len >= count) {
3361 count = -EINVAL;
3362 goto out_err;
3363 }
3364 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3365
3366 out_err:
3367 mutex_unlock(&tracing_cpumask_update_lock);
3368
3369 return count;
3370 }
3371
3372 static ssize_t
3373 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3374 size_t count, loff_t *ppos)
3375 {
3376 struct trace_array *tr = file_inode(filp)->i_private;
3377 cpumask_var_t tracing_cpumask_new;
3378 int err, cpu;
3379
3380 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3381 return -ENOMEM;
3382
3383 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3384 if (err)
3385 goto err_unlock;
3386
3387 mutex_lock(&tracing_cpumask_update_lock);
3388
3389 local_irq_disable();
3390 arch_spin_lock(&tr->max_lock);
3391 for_each_tracing_cpu(cpu) {
3392 /*
3393 * Increase/decrease the disabled counter if we are
3394 * about to flip a bit in the cpumask:
3395 */
3396 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3397 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3398 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3399 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3400 }
3401 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3402 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3403 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3404 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3405 }
3406 }
3407 arch_spin_unlock(&tr->max_lock);
3408 local_irq_enable();
3409
3410 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3411
3412 mutex_unlock(&tracing_cpumask_update_lock);
3413 free_cpumask_var(tracing_cpumask_new);
3414
3415 return count;
3416
3417 err_unlock:
3418 free_cpumask_var(tracing_cpumask_new);
3419
3420 return err;
3421 }
3422
3423 static const struct file_operations tracing_cpumask_fops = {
3424 .open = tracing_open_generic_tr,
3425 .read = tracing_cpumask_read,
3426 .write = tracing_cpumask_write,
3427 .release = tracing_release_generic_tr,
3428 .llseek = generic_file_llseek,
3429 };
3430
3431 static int tracing_trace_options_show(struct seq_file *m, void *v)
3432 {
3433 struct tracer_opt *trace_opts;
3434 struct trace_array *tr = m->private;
3435 u32 tracer_flags;
3436 int i;
3437
3438 mutex_lock(&trace_types_lock);
3439 tracer_flags = tr->current_trace->flags->val;
3440 trace_opts = tr->current_trace->flags->opts;
3441
3442 for (i = 0; trace_options[i]; i++) {
3443 if (trace_flags & (1 << i))
3444 seq_printf(m, "%s\n", trace_options[i]);
3445 else
3446 seq_printf(m, "no%s\n", trace_options[i]);
3447 }
3448
3449 for (i = 0; trace_opts[i].name; i++) {
3450 if (tracer_flags & trace_opts[i].bit)
3451 seq_printf(m, "%s\n", trace_opts[i].name);
3452 else
3453 seq_printf(m, "no%s\n", trace_opts[i].name);
3454 }
3455 mutex_unlock(&trace_types_lock);
3456
3457 return 0;
3458 }
3459
3460 static int __set_tracer_option(struct trace_array *tr,
3461 struct tracer_flags *tracer_flags,
3462 struct tracer_opt *opts, int neg)
3463 {
3464 struct tracer *trace = tr->current_trace;
3465 int ret;
3466
3467 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3468 if (ret)
3469 return ret;
3470
3471 if (neg)
3472 tracer_flags->val &= ~opts->bit;
3473 else
3474 tracer_flags->val |= opts->bit;
3475 return 0;
3476 }
3477
3478 /* Try to assign a tracer specific option */
3479 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3480 {
3481 struct tracer *trace = tr->current_trace;
3482 struct tracer_flags *tracer_flags = trace->flags;
3483 struct tracer_opt *opts = NULL;
3484 int i;
3485
3486 for (i = 0; tracer_flags->opts[i].name; i++) {
3487 opts = &tracer_flags->opts[i];
3488
3489 if (strcmp(cmp, opts->name) == 0)
3490 return __set_tracer_option(tr, trace->flags, opts, neg);
3491 }
3492
3493 return -EINVAL;
3494 }
3495
3496 /* Some tracers require overwrite to stay enabled */
3497 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3498 {
3499 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3500 return -1;
3501
3502 return 0;
3503 }
3504
3505 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3506 {
3507 /* do nothing if flag is already set */
3508 if (!!(trace_flags & mask) == !!enabled)
3509 return 0;
3510
3511 /* Give the tracer a chance to approve the change */
3512 if (tr->current_trace->flag_changed)
3513 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3514 return -EINVAL;
3515
3516 if (enabled)
3517 trace_flags |= mask;
3518 else
3519 trace_flags &= ~mask;
3520
3521 if (mask == TRACE_ITER_RECORD_CMD)
3522 trace_event_enable_cmd_record(enabled);
3523
3524 if (mask == TRACE_ITER_OVERWRITE) {
3525 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3526 #ifdef CONFIG_TRACER_MAX_TRACE
3527 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3528 #endif
3529 }
3530
3531 if (mask == TRACE_ITER_PRINTK)
3532 trace_printk_start_stop_comm(enabled);
3533
3534 return 0;
3535 }
3536
3537 static int trace_set_options(struct trace_array *tr, char *option)
3538 {
3539 char *cmp;
3540 int neg = 0;
3541 int ret = -ENODEV;
3542 int i;
3543
3544 cmp = strstrip(option);
3545
3546 if (strncmp(cmp, "no", 2) == 0) {
3547 neg = 1;
3548 cmp += 2;
3549 }
3550
3551 mutex_lock(&trace_types_lock);
3552
3553 for (i = 0; trace_options[i]; i++) {
3554 if (strcmp(cmp, trace_options[i]) == 0) {
3555 ret = set_tracer_flag(tr, 1 << i, !neg);
3556 break;
3557 }
3558 }
3559
3560 /* If no option could be set, test the specific tracer options */
3561 if (!trace_options[i])
3562 ret = set_tracer_option(tr, cmp, neg);
3563
3564 mutex_unlock(&trace_types_lock);
3565
3566 return ret;
3567 }
3568
3569 static ssize_t
3570 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3571 size_t cnt, loff_t *ppos)
3572 {
3573 struct seq_file *m = filp->private_data;
3574 struct trace_array *tr = m->private;
3575 char buf[64];
3576 int ret;
3577
3578 if (cnt >= sizeof(buf))
3579 return -EINVAL;
3580
3581 if (copy_from_user(&buf, ubuf, cnt))
3582 return -EFAULT;
3583
3584 buf[cnt] = 0;
3585
3586 ret = trace_set_options(tr, buf);
3587 if (ret < 0)
3588 return ret;
3589
3590 *ppos += cnt;
3591
3592 return cnt;
3593 }
3594
3595 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3596 {
3597 struct trace_array *tr = inode->i_private;
3598 int ret;
3599
3600 if (tracing_disabled)
3601 return -ENODEV;
3602
3603 if (trace_array_get(tr) < 0)
3604 return -ENODEV;
3605
3606 ret = single_open(file, tracing_trace_options_show, inode->i_private);
3607 if (ret < 0)
3608 trace_array_put(tr);
3609
3610 return ret;
3611 }
3612
3613 static const struct file_operations tracing_iter_fops = {
3614 .open = tracing_trace_options_open,
3615 .read = seq_read,
3616 .llseek = seq_lseek,
3617 .release = tracing_single_release_tr,
3618 .write = tracing_trace_options_write,
3619 };
3620
3621 static const char readme_msg[] =
3622 "tracing mini-HOWTO:\n\n"
3623 "# echo 0 > tracing_on : quick way to disable tracing\n"
3624 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3625 " Important files:\n"
3626 " trace\t\t\t- The static contents of the buffer\n"
3627 "\t\t\t To clear the buffer write into this file: echo > trace\n"
3628 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3629 " current_tracer\t- function and latency tracers\n"
3630 " available_tracers\t- list of configured tracers for current_tracer\n"
3631 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
3632 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
3633 " trace_clock\t\t-change the clock used to order events\n"
3634 " local: Per cpu clock but may not be synced across CPUs\n"
3635 " global: Synced across CPUs but slows tracing down.\n"
3636 " counter: Not a clock, but just an increment\n"
3637 " uptime: Jiffy counter from time of boot\n"
3638 " perf: Same clock that perf events use\n"
3639 #ifdef CONFIG_X86_64
3640 " x86-tsc: TSC cycle counter\n"
3641 #endif
3642 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3643 " tracing_cpumask\t- Limit which CPUs to trace\n"
3644 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3645 "\t\t\t Remove sub-buffer with rmdir\n"
3646 " trace_options\t\t- Set format or modify how tracing happens\n"
3647 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
3648 "\t\t\t option name\n"
3649 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3650 #ifdef CONFIG_DYNAMIC_FTRACE
3651 "\n available_filter_functions - list of functions that can be filtered on\n"
3652 " set_ftrace_filter\t- echo function name in here to only trace these\n"
3653 "\t\t\t functions\n"
3654 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3655 "\t modules: Can select a group via module\n"
3656 "\t Format: :mod:<module-name>\n"
3657 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
3658 "\t triggers: a command to perform when function is hit\n"
3659 "\t Format: <function>:<trigger>[:count]\n"
3660 "\t trigger: traceon, traceoff\n"
3661 "\t\t enable_event:<system>:<event>\n"
3662 "\t\t disable_event:<system>:<event>\n"
3663 #ifdef CONFIG_STACKTRACE
3664 "\t\t stacktrace\n"
3665 #endif
3666 #ifdef CONFIG_TRACER_SNAPSHOT
3667 "\t\t snapshot\n"
3668 #endif
3669 "\t\t dump\n"
3670 "\t\t cpudump\n"
3671 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
3672 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
3673 "\t The first one will disable tracing every time do_fault is hit\n"
3674 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
3675 "\t The first time do trap is hit and it disables tracing, the\n"
3676 "\t counter will decrement to 2. If tracing is already disabled,\n"
3677 "\t the counter will not decrement. It only decrements when the\n"
3678 "\t trigger did work\n"
3679 "\t To remove trigger without count:\n"
3680 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
3681 "\t To remove trigger with a count:\n"
3682 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3683 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
3684 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3685 "\t modules: Can select a group via module command :mod:\n"
3686 "\t Does not accept triggers\n"
3687 #endif /* CONFIG_DYNAMIC_FTRACE */
3688 #ifdef CONFIG_FUNCTION_TRACER
3689 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3690 "\t\t (function)\n"
3691 #endif
3692 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3693 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3694 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3695 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3696 #endif
3697 #ifdef CONFIG_TRACER_SNAPSHOT
3698 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
3699 "\t\t\t snapshot buffer. Read the contents for more\n"
3700 "\t\t\t information\n"
3701 #endif
3702 #ifdef CONFIG_STACK_TRACER
3703 " stack_trace\t\t- Shows the max stack trace when active\n"
3704 " stack_max_size\t- Shows current max stack size that was traced\n"
3705 "\t\t\t Write into this file to reset the max size (trigger a\n"
3706 "\t\t\t new trace)\n"
3707 #ifdef CONFIG_DYNAMIC_FTRACE
3708 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3709 "\t\t\t traces\n"
3710 #endif
3711 #endif /* CONFIG_STACK_TRACER */
3712 " events/\t\t- Directory containing all trace event subsystems:\n"
3713 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3714 " events/<system>/\t- Directory containing all trace events for <system>:\n"
3715 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3716 "\t\t\t events\n"
3717 " filter\t\t- If set, only events passing filter are traced\n"
3718 " events/<system>/<event>/\t- Directory containing control files for\n"
3719 "\t\t\t <event>:\n"
3720 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3721 " filter\t\t- If set, only events passing filter are traced\n"
3722 " trigger\t\t- If set, a command to perform when event is hit\n"
3723 "\t Format: <trigger>[:count][if <filter>]\n"
3724 "\t trigger: traceon, traceoff\n"
3725 "\t enable_event:<system>:<event>\n"
3726 "\t disable_event:<system>:<event>\n"
3727 #ifdef CONFIG_STACKTRACE
3728 "\t\t stacktrace\n"
3729 #endif
3730 #ifdef CONFIG_TRACER_SNAPSHOT
3731 "\t\t snapshot\n"
3732 #endif
3733 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
3734 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
3735 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3736 "\t events/block/block_unplug/trigger\n"
3737 "\t The first disables tracing every time block_unplug is hit.\n"
3738 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
3739 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
3740 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3741 "\t Like function triggers, the counter is only decremented if it\n"
3742 "\t enabled or disabled tracing.\n"
3743 "\t To remove a trigger without a count:\n"
3744 "\t echo '!<trigger> > <system>/<event>/trigger\n"
3745 "\t To remove a trigger with a count:\n"
3746 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
3747 "\t Filters can be ignored when removing a trigger.\n"
3748 ;
3749
3750 static ssize_t
3751 tracing_readme_read(struct file *filp, char __user *ubuf,
3752 size_t cnt, loff_t *ppos)
3753 {
3754 return simple_read_from_buffer(ubuf, cnt, ppos,
3755 readme_msg, strlen(readme_msg));
3756 }
3757
3758 static const struct file_operations tracing_readme_fops = {
3759 .open = tracing_open_generic,
3760 .read = tracing_readme_read,
3761 .llseek = generic_file_llseek,
3762 };
3763
3764 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3765 {
3766 unsigned int *ptr = v;
3767
3768 if (*pos || m->count)
3769 ptr++;
3770
3771 (*pos)++;
3772
3773 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3774 ptr++) {
3775 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3776 continue;
3777
3778 return ptr;
3779 }
3780
3781 return NULL;
3782 }
3783
3784 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3785 {
3786 void *v;
3787 loff_t l = 0;
3788
3789 preempt_disable();
3790 arch_spin_lock(&trace_cmdline_lock);
3791
3792 v = &savedcmd->map_cmdline_to_pid[0];
3793 while (l <= *pos) {
3794 v = saved_cmdlines_next(m, v, &l);
3795 if (!v)
3796 return NULL;
3797 }
3798
3799 return v;
3800 }
3801
3802 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3803 {
3804 arch_spin_unlock(&trace_cmdline_lock);
3805 preempt_enable();
3806 }
3807
3808 static int saved_cmdlines_show(struct seq_file *m, void *v)
3809 {
3810 char buf[TASK_COMM_LEN];
3811 unsigned int *pid = v;
3812
3813 __trace_find_cmdline(*pid, buf);
3814 seq_printf(m, "%d %s\n", *pid, buf);
3815 return 0;
3816 }
3817
3818 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3819 .start = saved_cmdlines_start,
3820 .next = saved_cmdlines_next,
3821 .stop = saved_cmdlines_stop,
3822 .show = saved_cmdlines_show,
3823 };
3824
3825 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3826 {
3827 if (tracing_disabled)
3828 return -ENODEV;
3829
3830 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3831 }
3832
3833 static const struct file_operations tracing_saved_cmdlines_fops = {
3834 .open = tracing_saved_cmdlines_open,
3835 .read = seq_read,
3836 .llseek = seq_lseek,
3837 .release = seq_release,
3838 };
3839
3840 static ssize_t
3841 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3842 size_t cnt, loff_t *ppos)
3843 {
3844 char buf[64];
3845 int r;
3846
3847 arch_spin_lock(&trace_cmdline_lock);
3848 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3849 arch_spin_unlock(&trace_cmdline_lock);
3850
3851 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3852 }
3853
3854 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3855 {
3856 kfree(s->saved_cmdlines);
3857 kfree(s->map_cmdline_to_pid);
3858 kfree(s);
3859 }
3860
3861 static int tracing_resize_saved_cmdlines(unsigned int val)
3862 {
3863 struct saved_cmdlines_buffer *s, *savedcmd_temp;
3864
3865 s = kmalloc(sizeof(*s), GFP_KERNEL);
3866 if (!s)
3867 return -ENOMEM;
3868
3869 if (allocate_cmdlines_buffer(val, s) < 0) {
3870 kfree(s);
3871 return -ENOMEM;
3872 }
3873
3874 arch_spin_lock(&trace_cmdline_lock);
3875 savedcmd_temp = savedcmd;
3876 savedcmd = s;
3877 arch_spin_unlock(&trace_cmdline_lock);
3878 free_saved_cmdlines_buffer(savedcmd_temp);
3879
3880 return 0;
3881 }
3882
3883 static ssize_t
3884 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3885 size_t cnt, loff_t *ppos)
3886 {
3887 unsigned long val;
3888 int ret;
3889
3890 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3891 if (ret)
3892 return ret;
3893
3894 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3895 if (!val || val > PID_MAX_DEFAULT)
3896 return -EINVAL;
3897
3898 ret = tracing_resize_saved_cmdlines((unsigned int)val);
3899 if (ret < 0)
3900 return ret;
3901
3902 *ppos += cnt;
3903
3904 return cnt;
3905 }
3906
3907 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3908 .open = tracing_open_generic,
3909 .read = tracing_saved_cmdlines_size_read,
3910 .write = tracing_saved_cmdlines_size_write,
3911 };
3912
3913 static ssize_t
3914 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3915 size_t cnt, loff_t *ppos)
3916 {
3917 struct trace_array *tr = filp->private_data;
3918 char buf[MAX_TRACER_SIZE+2];
3919 int r;
3920
3921 mutex_lock(&trace_types_lock);
3922 r = sprintf(buf, "%s\n", tr->current_trace->name);
3923 mutex_unlock(&trace_types_lock);
3924
3925 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3926 }
3927
3928 int tracer_init(struct tracer *t, struct trace_array *tr)
3929 {
3930 tracing_reset_online_cpus(&tr->trace_buffer);
3931 return t->init(tr);
3932 }
3933
3934 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3935 {
3936 int cpu;
3937
3938 for_each_tracing_cpu(cpu)
3939 per_cpu_ptr(buf->data, cpu)->entries = val;
3940 }
3941
3942 #ifdef CONFIG_TRACER_MAX_TRACE
3943 /* resize @tr's buffer to the size of @size_tr's entries */
3944 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3945 struct trace_buffer *size_buf, int cpu_id)
3946 {
3947 int cpu, ret = 0;
3948
3949 if (cpu_id == RING_BUFFER_ALL_CPUS) {
3950 for_each_tracing_cpu(cpu) {
3951 ret = ring_buffer_resize(trace_buf->buffer,
3952 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3953 if (ret < 0)
3954 break;
3955 per_cpu_ptr(trace_buf->data, cpu)->entries =
3956 per_cpu_ptr(size_buf->data, cpu)->entries;
3957 }
3958 } else {
3959 ret = ring_buffer_resize(trace_buf->buffer,
3960 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3961 if (ret == 0)
3962 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3963 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3964 }
3965
3966 return ret;
3967 }
3968 #endif /* CONFIG_TRACER_MAX_TRACE */
3969
3970 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3971 unsigned long size, int cpu)
3972 {
3973 int ret;
3974
3975 /*
3976 * If kernel or user changes the size of the ring buffer
3977 * we use the size that was given, and we can forget about
3978 * expanding it later.
3979 */
3980 ring_buffer_expanded = true;
3981
3982 /* May be called before buffers are initialized */
3983 if (!tr->trace_buffer.buffer)
3984 return 0;
3985
3986 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3987 if (ret < 0)
3988 return ret;
3989
3990 #ifdef CONFIG_TRACER_MAX_TRACE
3991 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3992 !tr->current_trace->use_max_tr)
3993 goto out;
3994
3995 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3996 if (ret < 0) {
3997 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3998 &tr->trace_buffer, cpu);
3999 if (r < 0) {
4000 /*
4001 * AARGH! We are left with different
4002 * size max buffer!!!!
4003 * The max buffer is our "snapshot" buffer.
4004 * When a tracer needs a snapshot (one of the
4005 * latency tracers), it swaps the max buffer
4006 * with the saved snap shot. We succeeded to
4007 * update the size of the main buffer, but failed to
4008 * update the size of the max buffer. But when we tried
4009 * to reset the main buffer to the original size, we
4010 * failed there too. This is very unlikely to
4011 * happen, but if it does, warn and kill all
4012 * tracing.
4013 */
4014 WARN_ON(1);
4015 tracing_disabled = 1;
4016 }
4017 return ret;
4018 }
4019
4020 if (cpu == RING_BUFFER_ALL_CPUS)
4021 set_buffer_entries(&tr->max_buffer, size);
4022 else
4023 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4024
4025 out:
4026 #endif /* CONFIG_TRACER_MAX_TRACE */
4027
4028 if (cpu == RING_BUFFER_ALL_CPUS)
4029 set_buffer_entries(&tr->trace_buffer, size);
4030 else
4031 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4032
4033 return ret;
4034 }
4035
4036 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4037 unsigned long size, int cpu_id)
4038 {
4039 int ret = size;
4040
4041 mutex_lock(&trace_types_lock);
4042
4043 if (cpu_id != RING_BUFFER_ALL_CPUS) {
4044 /* make sure, this cpu is enabled in the mask */
4045 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4046 ret = -EINVAL;
4047 goto out;
4048 }
4049 }
4050
4051 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4052 if (ret < 0)
4053 ret = -ENOMEM;
4054
4055 out:
4056 mutex_unlock(&trace_types_lock);
4057
4058 return ret;
4059 }
4060
4061
4062 /**
4063 * tracing_update_buffers - used by tracing facility to expand ring buffers
4064 *
4065 * To save on memory when the tracing is never used on a system with it
4066 * configured in. The ring buffers are set to a minimum size. But once
4067 * a user starts to use the tracing facility, then they need to grow
4068 * to their default size.
4069 *
4070 * This function is to be called when a tracer is about to be used.
4071 */
4072 int tracing_update_buffers(void)
4073 {
4074 int ret = 0;
4075
4076 mutex_lock(&trace_types_lock);
4077 if (!ring_buffer_expanded)
4078 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4079 RING_BUFFER_ALL_CPUS);
4080 mutex_unlock(&trace_types_lock);
4081
4082 return ret;
4083 }
4084
4085 struct trace_option_dentry;
4086
4087 static struct trace_option_dentry *
4088 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4089
4090 static void
4091 destroy_trace_option_files(struct trace_option_dentry *topts);
4092
4093 /*
4094 * Used to clear out the tracer before deletion of an instance.
4095 * Must have trace_types_lock held.
4096 */
4097 static void tracing_set_nop(struct trace_array *tr)
4098 {
4099 if (tr->current_trace == &nop_trace)
4100 return;
4101
4102 tr->current_trace->enabled--;
4103
4104 if (tr->current_trace->reset)
4105 tr->current_trace->reset(tr);
4106
4107 tr->current_trace = &nop_trace;
4108 }
4109
4110 static void update_tracer_options(struct trace_array *tr, struct tracer *t)
4111 {
4112 static struct trace_option_dentry *topts;
4113
4114 /* Only enable if the directory has been created already. */
4115 if (!tr->dir)
4116 return;
4117
4118 /* Currently, only the top instance has options */
4119 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL))
4120 return;
4121
4122 destroy_trace_option_files(topts);
4123 topts = create_trace_option_files(tr, t);
4124 }
4125
4126 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4127 {
4128 struct tracer *t;
4129 #ifdef CONFIG_TRACER_MAX_TRACE
4130 bool had_max_tr;
4131 #endif
4132 int ret = 0;
4133
4134 mutex_lock(&trace_types_lock);
4135
4136 if (!ring_buffer_expanded) {
4137 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4138 RING_BUFFER_ALL_CPUS);
4139 if (ret < 0)
4140 goto out;
4141 ret = 0;
4142 }
4143
4144 for (t = trace_types; t; t = t->next) {
4145 if (strcmp(t->name, buf) == 0)
4146 break;
4147 }
4148 if (!t) {
4149 ret = -EINVAL;
4150 goto out;
4151 }
4152 if (t == tr->current_trace)
4153 goto out;
4154
4155 /* Some tracers are only allowed for the top level buffer */
4156 if (!trace_ok_for_array(t, tr)) {
4157 ret = -EINVAL;
4158 goto out;
4159 }
4160
4161 /* If trace pipe files are being read, we can't change the tracer */
4162 if (tr->current_trace->ref) {
4163 ret = -EBUSY;
4164 goto out;
4165 }
4166
4167 trace_branch_disable();
4168
4169 tr->current_trace->enabled--;
4170
4171 if (tr->current_trace->reset)
4172 tr->current_trace->reset(tr);
4173
4174 /* Current trace needs to be nop_trace before synchronize_sched */
4175 tr->current_trace = &nop_trace;
4176
4177 #ifdef CONFIG_TRACER_MAX_TRACE
4178 had_max_tr = tr->allocated_snapshot;
4179
4180 if (had_max_tr && !t->use_max_tr) {
4181 /*
4182 * We need to make sure that the update_max_tr sees that
4183 * current_trace changed to nop_trace to keep it from
4184 * swapping the buffers after we resize it.
4185 * The update_max_tr is called from interrupts disabled
4186 * so a synchronized_sched() is sufficient.
4187 */
4188 synchronize_sched();
4189 free_snapshot(tr);
4190 }
4191 #endif
4192 update_tracer_options(tr, t);
4193
4194 #ifdef CONFIG_TRACER_MAX_TRACE
4195 if (t->use_max_tr && !had_max_tr) {
4196 ret = alloc_snapshot(tr);
4197 if (ret < 0)
4198 goto out;
4199 }
4200 #endif
4201
4202 if (t->init) {
4203 ret = tracer_init(t, tr);
4204 if (ret)
4205 goto out;
4206 }
4207
4208 tr->current_trace = t;
4209 tr->current_trace->enabled++;
4210 trace_branch_enable(tr);
4211 out:
4212 mutex_unlock(&trace_types_lock);
4213
4214 return ret;
4215 }
4216
4217 static ssize_t
4218 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4219 size_t cnt, loff_t *ppos)
4220 {
4221 struct trace_array *tr = filp->private_data;
4222 char buf[MAX_TRACER_SIZE+1];
4223 int i;
4224 size_t ret;
4225 int err;
4226
4227 ret = cnt;
4228
4229 if (cnt > MAX_TRACER_SIZE)
4230 cnt = MAX_TRACER_SIZE;
4231
4232 if (copy_from_user(&buf, ubuf, cnt))
4233 return -EFAULT;
4234
4235 buf[cnt] = 0;
4236
4237 /* strip ending whitespace. */
4238 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4239 buf[i] = 0;
4240
4241 err = tracing_set_tracer(tr, buf);
4242 if (err)
4243 return err;
4244
4245 *ppos += ret;
4246
4247 return ret;
4248 }
4249
4250 static ssize_t
4251 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4252 size_t cnt, loff_t *ppos)
4253 {
4254 char buf[64];
4255 int r;
4256
4257 r = snprintf(buf, sizeof(buf), "%ld\n",
4258 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4259 if (r > sizeof(buf))
4260 r = sizeof(buf);
4261 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4262 }
4263
4264 static ssize_t
4265 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4266 size_t cnt, loff_t *ppos)
4267 {
4268 unsigned long val;
4269 int ret;
4270
4271 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4272 if (ret)
4273 return ret;
4274
4275 *ptr = val * 1000;
4276
4277 return cnt;
4278 }
4279
4280 static ssize_t
4281 tracing_thresh_read(struct file *filp, char __user *ubuf,
4282 size_t cnt, loff_t *ppos)
4283 {
4284 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4285 }
4286
4287 static ssize_t
4288 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4289 size_t cnt, loff_t *ppos)
4290 {
4291 struct trace_array *tr = filp->private_data;
4292 int ret;
4293
4294 mutex_lock(&trace_types_lock);
4295 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4296 if (ret < 0)
4297 goto out;
4298
4299 if (tr->current_trace->update_thresh) {
4300 ret = tr->current_trace->update_thresh(tr);
4301 if (ret < 0)
4302 goto out;
4303 }
4304
4305 ret = cnt;
4306 out:
4307 mutex_unlock(&trace_types_lock);
4308
4309 return ret;
4310 }
4311
4312 static ssize_t
4313 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4314 size_t cnt, loff_t *ppos)
4315 {
4316 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4317 }
4318
4319 static ssize_t
4320 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4321 size_t cnt, loff_t *ppos)
4322 {
4323 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4324 }
4325
4326 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4327 {
4328 struct trace_array *tr = inode->i_private;
4329 struct trace_iterator *iter;
4330 int ret = 0;
4331
4332 if (tracing_disabled)
4333 return -ENODEV;
4334
4335 if (trace_array_get(tr) < 0)
4336 return -ENODEV;
4337
4338 mutex_lock(&trace_types_lock);
4339
4340 /* create a buffer to store the information to pass to userspace */
4341 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4342 if (!iter) {
4343 ret = -ENOMEM;
4344 __trace_array_put(tr);
4345 goto out;
4346 }
4347
4348 trace_seq_init(&iter->seq);
4349 iter->trace = tr->current_trace;
4350
4351 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4352 ret = -ENOMEM;
4353 goto fail;
4354 }
4355
4356 /* trace pipe does not show start of buffer */
4357 cpumask_setall(iter->started);
4358
4359 if (trace_flags & TRACE_ITER_LATENCY_FMT)
4360 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4361
4362 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4363 if (trace_clocks[tr->clock_id].in_ns)
4364 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4365
4366 iter->tr = tr;
4367 iter->trace_buffer = &tr->trace_buffer;
4368 iter->cpu_file = tracing_get_cpu(inode);
4369 mutex_init(&iter->mutex);
4370 filp->private_data = iter;
4371
4372 if (iter->trace->pipe_open)
4373 iter->trace->pipe_open(iter);
4374
4375 nonseekable_open(inode, filp);
4376
4377 tr->current_trace->ref++;
4378 out:
4379 mutex_unlock(&trace_types_lock);
4380 return ret;
4381
4382 fail:
4383 kfree(iter->trace);
4384 kfree(iter);
4385 __trace_array_put(tr);
4386 mutex_unlock(&trace_types_lock);
4387 return ret;
4388 }
4389
4390 static int tracing_release_pipe(struct inode *inode, struct file *file)
4391 {
4392 struct trace_iterator *iter = file->private_data;
4393 struct trace_array *tr = inode->i_private;
4394
4395 mutex_lock(&trace_types_lock);
4396
4397 tr->current_trace->ref--;
4398
4399 if (iter->trace->pipe_close)
4400 iter->trace->pipe_close(iter);
4401
4402 mutex_unlock(&trace_types_lock);
4403
4404 free_cpumask_var(iter->started);
4405 mutex_destroy(&iter->mutex);
4406 kfree(iter);
4407
4408 trace_array_put(tr);
4409
4410 return 0;
4411 }
4412
4413 static unsigned int
4414 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4415 {
4416 /* Iterators are static, they should be filled or empty */
4417 if (trace_buffer_iter(iter, iter->cpu_file))
4418 return POLLIN | POLLRDNORM;
4419
4420 if (trace_flags & TRACE_ITER_BLOCK)
4421 /*
4422 * Always select as readable when in blocking mode
4423 */
4424 return POLLIN | POLLRDNORM;
4425 else
4426 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4427 filp, poll_table);
4428 }
4429
4430 static unsigned int
4431 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4432 {
4433 struct trace_iterator *iter = filp->private_data;
4434
4435 return trace_poll(iter, filp, poll_table);
4436 }
4437
4438 /* Must be called with iter->mutex held. */
4439 static int tracing_wait_pipe(struct file *filp)
4440 {
4441 struct trace_iterator *iter = filp->private_data;
4442 int ret;
4443
4444 while (trace_empty(iter)) {
4445
4446 if ((filp->f_flags & O_NONBLOCK)) {
4447 return -EAGAIN;
4448 }
4449
4450 /*
4451 * We block until we read something and tracing is disabled.
4452 * We still block if tracing is disabled, but we have never
4453 * read anything. This allows a user to cat this file, and
4454 * then enable tracing. But after we have read something,
4455 * we give an EOF when tracing is again disabled.
4456 *
4457 * iter->pos will be 0 if we haven't read anything.
4458 */
4459 if (!tracing_is_on() && iter->pos)
4460 break;
4461
4462 mutex_unlock(&iter->mutex);
4463
4464 ret = wait_on_pipe(iter, false);
4465
4466 mutex_lock(&iter->mutex);
4467
4468 if (ret)
4469 return ret;
4470 }
4471
4472 return 1;
4473 }
4474
4475 /*
4476 * Consumer reader.
4477 */
4478 static ssize_t
4479 tracing_read_pipe(struct file *filp, char __user *ubuf,
4480 size_t cnt, loff_t *ppos)
4481 {
4482 struct trace_iterator *iter = filp->private_data;
4483 ssize_t sret;
4484
4485 /* return any leftover data */
4486 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4487 if (sret != -EBUSY)
4488 return sret;
4489
4490 trace_seq_init(&iter->seq);
4491
4492 /*
4493 * Avoid more than one consumer on a single file descriptor
4494 * This is just a matter of traces coherency, the ring buffer itself
4495 * is protected.
4496 */
4497 mutex_lock(&iter->mutex);
4498 if (iter->trace->read) {
4499 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4500 if (sret)
4501 goto out;
4502 }
4503
4504 waitagain:
4505 sret = tracing_wait_pipe(filp);
4506 if (sret <= 0)
4507 goto out;
4508
4509 /* stop when tracing is finished */
4510 if (trace_empty(iter)) {
4511 sret = 0;
4512 goto out;
4513 }
4514
4515 if (cnt >= PAGE_SIZE)
4516 cnt = PAGE_SIZE - 1;
4517
4518 /* reset all but tr, trace, and overruns */
4519 memset(&iter->seq, 0,
4520 sizeof(struct trace_iterator) -
4521 offsetof(struct trace_iterator, seq));
4522 cpumask_clear(iter->started);
4523 iter->pos = -1;
4524
4525 trace_event_read_lock();
4526 trace_access_lock(iter->cpu_file);
4527 while (trace_find_next_entry_inc(iter) != NULL) {
4528 enum print_line_t ret;
4529 int save_len = iter->seq.seq.len;
4530
4531 ret = print_trace_line(iter);
4532 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4533 /* don't print partial lines */
4534 iter->seq.seq.len = save_len;
4535 break;
4536 }
4537 if (ret != TRACE_TYPE_NO_CONSUME)
4538 trace_consume(iter);
4539
4540 if (trace_seq_used(&iter->seq) >= cnt)
4541 break;
4542
4543 /*
4544 * Setting the full flag means we reached the trace_seq buffer
4545 * size and we should leave by partial output condition above.
4546 * One of the trace_seq_* functions is not used properly.
4547 */
4548 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4549 iter->ent->type);
4550 }
4551 trace_access_unlock(iter->cpu_file);
4552 trace_event_read_unlock();
4553
4554 /* Now copy what we have to the user */
4555 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4556 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4557 trace_seq_init(&iter->seq);
4558
4559 /*
4560 * If there was nothing to send to user, in spite of consuming trace
4561 * entries, go back to wait for more entries.
4562 */
4563 if (sret == -EBUSY)
4564 goto waitagain;
4565
4566 out:
4567 mutex_unlock(&iter->mutex);
4568
4569 return sret;
4570 }
4571
4572 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4573 unsigned int idx)
4574 {
4575 __free_page(spd->pages[idx]);
4576 }
4577
4578 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4579 .can_merge = 0,
4580 .confirm = generic_pipe_buf_confirm,
4581 .release = generic_pipe_buf_release,
4582 .steal = generic_pipe_buf_steal,
4583 .get = generic_pipe_buf_get,
4584 };
4585
4586 static size_t
4587 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4588 {
4589 size_t count;
4590 int save_len;
4591 int ret;
4592
4593 /* Seq buffer is page-sized, exactly what we need. */
4594 for (;;) {
4595 save_len = iter->seq.seq.len;
4596 ret = print_trace_line(iter);
4597
4598 if (trace_seq_has_overflowed(&iter->seq)) {
4599 iter->seq.seq.len = save_len;
4600 break;
4601 }
4602
4603 /*
4604 * This should not be hit, because it should only
4605 * be set if the iter->seq overflowed. But check it
4606 * anyway to be safe.
4607 */
4608 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4609 iter->seq.seq.len = save_len;
4610 break;
4611 }
4612
4613 count = trace_seq_used(&iter->seq) - save_len;
4614 if (rem < count) {
4615 rem = 0;
4616 iter->seq.seq.len = save_len;
4617 break;
4618 }
4619
4620 if (ret != TRACE_TYPE_NO_CONSUME)
4621 trace_consume(iter);
4622 rem -= count;
4623 if (!trace_find_next_entry_inc(iter)) {
4624 rem = 0;
4625 iter->ent = NULL;
4626 break;
4627 }
4628 }
4629
4630 return rem;
4631 }
4632
4633 static ssize_t tracing_splice_read_pipe(struct file *filp,
4634 loff_t *ppos,
4635 struct pipe_inode_info *pipe,
4636 size_t len,
4637 unsigned int flags)
4638 {
4639 struct page *pages_def[PIPE_DEF_BUFFERS];
4640 struct partial_page partial_def[PIPE_DEF_BUFFERS];
4641 struct trace_iterator *iter = filp->private_data;
4642 struct splice_pipe_desc spd = {
4643 .pages = pages_def,
4644 .partial = partial_def,
4645 .nr_pages = 0, /* This gets updated below. */
4646 .nr_pages_max = PIPE_DEF_BUFFERS,
4647 .flags = flags,
4648 .ops = &tracing_pipe_buf_ops,
4649 .spd_release = tracing_spd_release_pipe,
4650 };
4651 ssize_t ret;
4652 size_t rem;
4653 unsigned int i;
4654
4655 if (splice_grow_spd(pipe, &spd))
4656 return -ENOMEM;
4657
4658 mutex_lock(&iter->mutex);
4659
4660 if (iter->trace->splice_read) {
4661 ret = iter->trace->splice_read(iter, filp,
4662 ppos, pipe, len, flags);
4663 if (ret)
4664 goto out_err;
4665 }
4666
4667 ret = tracing_wait_pipe(filp);
4668 if (ret <= 0)
4669 goto out_err;
4670
4671 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4672 ret = -EFAULT;
4673 goto out_err;
4674 }
4675
4676 trace_event_read_lock();
4677 trace_access_lock(iter->cpu_file);
4678
4679 /* Fill as many pages as possible. */
4680 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4681 spd.pages[i] = alloc_page(GFP_KERNEL);
4682 if (!spd.pages[i])
4683 break;
4684
4685 rem = tracing_fill_pipe_page(rem, iter);
4686
4687 /* Copy the data into the page, so we can start over. */
4688 ret = trace_seq_to_buffer(&iter->seq,
4689 page_address(spd.pages[i]),
4690 trace_seq_used(&iter->seq));
4691 if (ret < 0) {
4692 __free_page(spd.pages[i]);
4693 break;
4694 }
4695 spd.partial[i].offset = 0;
4696 spd.partial[i].len = trace_seq_used(&iter->seq);
4697
4698 trace_seq_init(&iter->seq);
4699 }
4700
4701 trace_access_unlock(iter->cpu_file);
4702 trace_event_read_unlock();
4703 mutex_unlock(&iter->mutex);
4704
4705 spd.nr_pages = i;
4706
4707 ret = splice_to_pipe(pipe, &spd);
4708 out:
4709 splice_shrink_spd(&spd);
4710 return ret;
4711
4712 out_err:
4713 mutex_unlock(&iter->mutex);
4714 goto out;
4715 }
4716
4717 static ssize_t
4718 tracing_entries_read(struct file *filp, char __user *ubuf,
4719 size_t cnt, loff_t *ppos)
4720 {
4721 struct inode *inode = file_inode(filp);
4722 struct trace_array *tr = inode->i_private;
4723 int cpu = tracing_get_cpu(inode);
4724 char buf[64];
4725 int r = 0;
4726 ssize_t ret;
4727
4728 mutex_lock(&trace_types_lock);
4729
4730 if (cpu == RING_BUFFER_ALL_CPUS) {
4731 int cpu, buf_size_same;
4732 unsigned long size;
4733
4734 size = 0;
4735 buf_size_same = 1;
4736 /* check if all cpu sizes are same */
4737 for_each_tracing_cpu(cpu) {
4738 /* fill in the size from first enabled cpu */
4739 if (size == 0)
4740 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4741 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4742 buf_size_same = 0;
4743 break;
4744 }
4745 }
4746
4747 if (buf_size_same) {
4748 if (!ring_buffer_expanded)
4749 r = sprintf(buf, "%lu (expanded: %lu)\n",
4750 size >> 10,
4751 trace_buf_size >> 10);
4752 else
4753 r = sprintf(buf, "%lu\n", size >> 10);
4754 } else
4755 r = sprintf(buf, "X\n");
4756 } else
4757 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4758
4759 mutex_unlock(&trace_types_lock);
4760
4761 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4762 return ret;
4763 }
4764
4765 static ssize_t
4766 tracing_entries_write(struct file *filp, const char __user *ubuf,
4767 size_t cnt, loff_t *ppos)
4768 {
4769 struct inode *inode = file_inode(filp);
4770 struct trace_array *tr = inode->i_private;
4771 unsigned long val;
4772 int ret;
4773
4774 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4775 if (ret)
4776 return ret;
4777
4778 /* must have at least 1 entry */
4779 if (!val)
4780 return -EINVAL;
4781
4782 /* value is in KB */
4783 val <<= 10;
4784 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4785 if (ret < 0)
4786 return ret;
4787
4788 *ppos += cnt;
4789
4790 return cnt;
4791 }
4792
4793 static ssize_t
4794 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4795 size_t cnt, loff_t *ppos)
4796 {
4797 struct trace_array *tr = filp->private_data;
4798 char buf[64];
4799 int r, cpu;
4800 unsigned long size = 0, expanded_size = 0;
4801
4802 mutex_lock(&trace_types_lock);
4803 for_each_tracing_cpu(cpu) {
4804 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4805 if (!ring_buffer_expanded)
4806 expanded_size += trace_buf_size >> 10;
4807 }
4808 if (ring_buffer_expanded)
4809 r = sprintf(buf, "%lu\n", size);
4810 else
4811 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4812 mutex_unlock(&trace_types_lock);
4813
4814 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4815 }
4816
4817 static ssize_t
4818 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4819 size_t cnt, loff_t *ppos)
4820 {
4821 /*
4822 * There is no need to read what the user has written, this function
4823 * is just to make sure that there is no error when "echo" is used
4824 */
4825
4826 *ppos += cnt;
4827
4828 return cnt;
4829 }
4830
4831 static int
4832 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4833 {
4834 struct trace_array *tr = inode->i_private;
4835
4836 /* disable tracing ? */
4837 if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4838 tracer_tracing_off(tr);
4839 /* resize the ring buffer to 0 */
4840 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4841
4842 trace_array_put(tr);
4843
4844 return 0;
4845 }
4846
4847 static ssize_t
4848 tracing_mark_write(struct file *filp, const char __user *ubuf,
4849 size_t cnt, loff_t *fpos)
4850 {
4851 unsigned long addr = (unsigned long)ubuf;
4852 struct trace_array *tr = filp->private_data;
4853 struct ring_buffer_event *event;
4854 struct ring_buffer *buffer;
4855 struct print_entry *entry;
4856 unsigned long irq_flags;
4857 struct page *pages[2];
4858 void *map_page[2];
4859 int nr_pages = 1;
4860 ssize_t written;
4861 int offset;
4862 int size;
4863 int len;
4864 int ret;
4865 int i;
4866
4867 if (tracing_disabled)
4868 return -EINVAL;
4869
4870 if (!(trace_flags & TRACE_ITER_MARKERS))
4871 return -EINVAL;
4872
4873 if (cnt > TRACE_BUF_SIZE)
4874 cnt = TRACE_BUF_SIZE;
4875
4876 /*
4877 * Userspace is injecting traces into the kernel trace buffer.
4878 * We want to be as non intrusive as possible.
4879 * To do so, we do not want to allocate any special buffers
4880 * or take any locks, but instead write the userspace data
4881 * straight into the ring buffer.
4882 *
4883 * First we need to pin the userspace buffer into memory,
4884 * which, most likely it is, because it just referenced it.
4885 * But there's no guarantee that it is. By using get_user_pages_fast()
4886 * and kmap_atomic/kunmap_atomic() we can get access to the
4887 * pages directly. We then write the data directly into the
4888 * ring buffer.
4889 */
4890 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4891
4892 /* check if we cross pages */
4893 if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4894 nr_pages = 2;
4895
4896 offset = addr & (PAGE_SIZE - 1);
4897 addr &= PAGE_MASK;
4898
4899 ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4900 if (ret < nr_pages) {
4901 while (--ret >= 0)
4902 put_page(pages[ret]);
4903 written = -EFAULT;
4904 goto out;
4905 }
4906
4907 for (i = 0; i < nr_pages; i++)
4908 map_page[i] = kmap_atomic(pages[i]);
4909
4910 local_save_flags(irq_flags);
4911 size = sizeof(*entry) + cnt + 2; /* possible \n added */
4912 buffer = tr->trace_buffer.buffer;
4913 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4914 irq_flags, preempt_count());
4915 if (!event) {
4916 /* Ring buffer disabled, return as if not open for write */
4917 written = -EBADF;
4918 goto out_unlock;
4919 }
4920
4921 entry = ring_buffer_event_data(event);
4922 entry->ip = _THIS_IP_;
4923
4924 if (nr_pages == 2) {
4925 len = PAGE_SIZE - offset;
4926 memcpy(&entry->buf, map_page[0] + offset, len);
4927 memcpy(&entry->buf[len], map_page[1], cnt - len);
4928 } else
4929 memcpy(&entry->buf, map_page[0] + offset, cnt);
4930
4931 if (entry->buf[cnt - 1] != '\n') {
4932 entry->buf[cnt] = '\n';
4933 entry->buf[cnt + 1] = '\0';
4934 } else
4935 entry->buf[cnt] = '\0';
4936
4937 __buffer_unlock_commit(buffer, event);
4938
4939 written = cnt;
4940
4941 *fpos += written;
4942
4943 out_unlock:
4944 for (i = nr_pages - 1; i >= 0; i--) {
4945 kunmap_atomic(map_page[i]);
4946 put_page(pages[i]);
4947 }
4948 out:
4949 return written;
4950 }
4951
4952 static int tracing_clock_show(struct seq_file *m, void *v)
4953 {
4954 struct trace_array *tr = m->private;
4955 int i;
4956
4957 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4958 seq_printf(m,
4959 "%s%s%s%s", i ? " " : "",
4960 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4961 i == tr->clock_id ? "]" : "");
4962 seq_putc(m, '\n');
4963
4964 return 0;
4965 }
4966
4967 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4968 {
4969 int i;
4970
4971 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4972 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4973 break;
4974 }
4975 if (i == ARRAY_SIZE(trace_clocks))
4976 return -EINVAL;
4977
4978 mutex_lock(&trace_types_lock);
4979
4980 tr->clock_id = i;
4981
4982 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4983
4984 /*
4985 * New clock may not be consistent with the previous clock.
4986 * Reset the buffer so that it doesn't have incomparable timestamps.
4987 */
4988 tracing_reset_online_cpus(&tr->trace_buffer);
4989
4990 #ifdef CONFIG_TRACER_MAX_TRACE
4991 if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4992 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4993 tracing_reset_online_cpus(&tr->max_buffer);
4994 #endif
4995
4996 mutex_unlock(&trace_types_lock);
4997
4998 return 0;
4999 }
5000
5001 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5002 size_t cnt, loff_t *fpos)
5003 {
5004 struct seq_file *m = filp->private_data;
5005 struct trace_array *tr = m->private;
5006 char buf[64];
5007 const char *clockstr;
5008 int ret;
5009
5010 if (cnt >= sizeof(buf))
5011 return -EINVAL;
5012
5013 if (copy_from_user(&buf, ubuf, cnt))
5014 return -EFAULT;
5015
5016 buf[cnt] = 0;
5017
5018 clockstr = strstrip(buf);
5019
5020 ret = tracing_set_clock(tr, clockstr);
5021 if (ret)
5022 return ret;
5023
5024 *fpos += cnt;
5025
5026 return cnt;
5027 }
5028
5029 static int tracing_clock_open(struct inode *inode, struct file *file)
5030 {
5031 struct trace_array *tr = inode->i_private;
5032 int ret;
5033
5034 if (tracing_disabled)
5035 return -ENODEV;
5036
5037 if (trace_array_get(tr))
5038 return -ENODEV;
5039
5040 ret = single_open(file, tracing_clock_show, inode->i_private);
5041 if (ret < 0)
5042 trace_array_put(tr);
5043
5044 return ret;
5045 }
5046
5047 struct ftrace_buffer_info {
5048 struct trace_iterator iter;
5049 void *spare;
5050 unsigned int read;
5051 };
5052
5053 #ifdef CONFIG_TRACER_SNAPSHOT
5054 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5055 {
5056 struct trace_array *tr = inode->i_private;
5057 struct trace_iterator *iter;
5058 struct seq_file *m;
5059 int ret = 0;
5060
5061 if (trace_array_get(tr) < 0)
5062 return -ENODEV;
5063
5064 if (file->f_mode & FMODE_READ) {
5065 iter = __tracing_open(inode, file, true);
5066 if (IS_ERR(iter))
5067 ret = PTR_ERR(iter);
5068 } else {
5069 /* Writes still need the seq_file to hold the private data */
5070 ret = -ENOMEM;
5071 m = kzalloc(sizeof(*m), GFP_KERNEL);
5072 if (!m)
5073 goto out;
5074 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5075 if (!iter) {
5076 kfree(m);
5077 goto out;
5078 }
5079 ret = 0;
5080
5081 iter->tr = tr;
5082 iter->trace_buffer = &tr->max_buffer;
5083 iter->cpu_file = tracing_get_cpu(inode);
5084 m->private = iter;
5085 file->private_data = m;
5086 }
5087 out:
5088 if (ret < 0)
5089 trace_array_put(tr);
5090
5091 return ret;
5092 }
5093
5094 static ssize_t
5095 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5096 loff_t *ppos)
5097 {
5098 struct seq_file *m = filp->private_data;
5099 struct trace_iterator *iter = m->private;
5100 struct trace_array *tr = iter->tr;
5101 unsigned long val;
5102 int ret;
5103
5104 ret = tracing_update_buffers();
5105 if (ret < 0)
5106 return ret;
5107
5108 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5109 if (ret)
5110 return ret;
5111
5112 mutex_lock(&trace_types_lock);
5113
5114 if (tr->current_trace->use_max_tr) {
5115 ret = -EBUSY;
5116 goto out;
5117 }
5118
5119 switch (val) {
5120 case 0:
5121 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5122 ret = -EINVAL;
5123 break;
5124 }
5125 if (tr->allocated_snapshot)
5126 free_snapshot(tr);
5127 break;
5128 case 1:
5129 /* Only allow per-cpu swap if the ring buffer supports it */
5130 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5131 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5132 ret = -EINVAL;
5133 break;
5134 }
5135 #endif
5136 if (!tr->allocated_snapshot) {
5137 ret = alloc_snapshot(tr);
5138 if (ret < 0)
5139 break;
5140 }
5141 local_irq_disable();
5142 /* Now, we're going to swap */
5143 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5144 update_max_tr(tr, current, smp_processor_id());
5145 else
5146 update_max_tr_single(tr, current, iter->cpu_file);
5147 local_irq_enable();
5148 break;
5149 default:
5150 if (tr->allocated_snapshot) {
5151 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5152 tracing_reset_online_cpus(&tr->max_buffer);
5153 else
5154 tracing_reset(&tr->max_buffer, iter->cpu_file);
5155 }
5156 break;
5157 }
5158
5159 if (ret >= 0) {
5160 *ppos += cnt;
5161 ret = cnt;
5162 }
5163 out:
5164 mutex_unlock(&trace_types_lock);
5165 return ret;
5166 }
5167
5168 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5169 {
5170 struct seq_file *m = file->private_data;
5171 int ret;
5172
5173 ret = tracing_release(inode, file);
5174
5175 if (file->f_mode & FMODE_READ)
5176 return ret;
5177
5178 /* If write only, the seq_file is just a stub */
5179 if (m)
5180 kfree(m->private);
5181 kfree(m);
5182
5183 return 0;
5184 }
5185
5186 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5187 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5188 size_t count, loff_t *ppos);
5189 static int tracing_buffers_release(struct inode *inode, struct file *file);
5190 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5191 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5192
5193 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5194 {
5195 struct ftrace_buffer_info *info;
5196 int ret;
5197
5198 ret = tracing_buffers_open(inode, filp);
5199 if (ret < 0)
5200 return ret;
5201
5202 info = filp->private_data;
5203
5204 if (info->iter.trace->use_max_tr) {
5205 tracing_buffers_release(inode, filp);
5206 return -EBUSY;
5207 }
5208
5209 info->iter.snapshot = true;
5210 info->iter.trace_buffer = &info->iter.tr->max_buffer;
5211
5212 return ret;
5213 }
5214
5215 #endif /* CONFIG_TRACER_SNAPSHOT */
5216
5217
5218 static const struct file_operations tracing_thresh_fops = {
5219 .open = tracing_open_generic,
5220 .read = tracing_thresh_read,
5221 .write = tracing_thresh_write,
5222 .llseek = generic_file_llseek,
5223 };
5224
5225 static const struct file_operations tracing_max_lat_fops = {
5226 .open = tracing_open_generic,
5227 .read = tracing_max_lat_read,
5228 .write = tracing_max_lat_write,
5229 .llseek = generic_file_llseek,
5230 };
5231
5232 static const struct file_operations set_tracer_fops = {
5233 .open = tracing_open_generic,
5234 .read = tracing_set_trace_read,
5235 .write = tracing_set_trace_write,
5236 .llseek = generic_file_llseek,
5237 };
5238
5239 static const struct file_operations tracing_pipe_fops = {
5240 .open = tracing_open_pipe,
5241 .poll = tracing_poll_pipe,
5242 .read = tracing_read_pipe,
5243 .splice_read = tracing_splice_read_pipe,
5244 .release = tracing_release_pipe,
5245 .llseek = no_llseek,
5246 };
5247
5248 static const struct file_operations tracing_entries_fops = {
5249 .open = tracing_open_generic_tr,
5250 .read = tracing_entries_read,
5251 .write = tracing_entries_write,
5252 .llseek = generic_file_llseek,
5253 .release = tracing_release_generic_tr,
5254 };
5255
5256 static const struct file_operations tracing_total_entries_fops = {
5257 .open = tracing_open_generic_tr,
5258 .read = tracing_total_entries_read,
5259 .llseek = generic_file_llseek,
5260 .release = tracing_release_generic_tr,
5261 };
5262
5263 static const struct file_operations tracing_free_buffer_fops = {
5264 .open = tracing_open_generic_tr,
5265 .write = tracing_free_buffer_write,
5266 .release = tracing_free_buffer_release,
5267 };
5268
5269 static const struct file_operations tracing_mark_fops = {
5270 .open = tracing_open_generic_tr,
5271 .write = tracing_mark_write,
5272 .llseek = generic_file_llseek,
5273 .release = tracing_release_generic_tr,
5274 };
5275
5276 static const struct file_operations trace_clock_fops = {
5277 .open = tracing_clock_open,
5278 .read = seq_read,
5279 .llseek = seq_lseek,
5280 .release = tracing_single_release_tr,
5281 .write = tracing_clock_write,
5282 };
5283
5284 #ifdef CONFIG_TRACER_SNAPSHOT
5285 static const struct file_operations snapshot_fops = {
5286 .open = tracing_snapshot_open,
5287 .read = seq_read,
5288 .write = tracing_snapshot_write,
5289 .llseek = tracing_lseek,
5290 .release = tracing_snapshot_release,
5291 };
5292
5293 static const struct file_operations snapshot_raw_fops = {
5294 .open = snapshot_raw_open,
5295 .read = tracing_buffers_read,
5296 .release = tracing_buffers_release,
5297 .splice_read = tracing_buffers_splice_read,
5298 .llseek = no_llseek,
5299 };
5300
5301 #endif /* CONFIG_TRACER_SNAPSHOT */
5302
5303 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5304 {
5305 struct trace_array *tr = inode->i_private;
5306 struct ftrace_buffer_info *info;
5307 int ret;
5308
5309 if (tracing_disabled)
5310 return -ENODEV;
5311
5312 if (trace_array_get(tr) < 0)
5313 return -ENODEV;
5314
5315 info = kzalloc(sizeof(*info), GFP_KERNEL);
5316 if (!info) {
5317 trace_array_put(tr);
5318 return -ENOMEM;
5319 }
5320
5321 mutex_lock(&trace_types_lock);
5322
5323 info->iter.tr = tr;
5324 info->iter.cpu_file = tracing_get_cpu(inode);
5325 info->iter.trace = tr->current_trace;
5326 info->iter.trace_buffer = &tr->trace_buffer;
5327 info->spare = NULL;
5328 /* Force reading ring buffer for first read */
5329 info->read = (unsigned int)-1;
5330
5331 filp->private_data = info;
5332
5333 tr->current_trace->ref++;
5334
5335 mutex_unlock(&trace_types_lock);
5336
5337 ret = nonseekable_open(inode, filp);
5338 if (ret < 0)
5339 trace_array_put(tr);
5340
5341 return ret;
5342 }
5343
5344 static unsigned int
5345 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5346 {
5347 struct ftrace_buffer_info *info = filp->private_data;
5348 struct trace_iterator *iter = &info->iter;
5349
5350 return trace_poll(iter, filp, poll_table);
5351 }
5352
5353 static ssize_t
5354 tracing_buffers_read(struct file *filp, char __user *ubuf,
5355 size_t count, loff_t *ppos)
5356 {
5357 struct ftrace_buffer_info *info = filp->private_data;
5358 struct trace_iterator *iter = &info->iter;
5359 ssize_t ret;
5360 ssize_t size;
5361
5362 if (!count)
5363 return 0;
5364
5365 #ifdef CONFIG_TRACER_MAX_TRACE
5366 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5367 return -EBUSY;
5368 #endif
5369
5370 if (!info->spare)
5371 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5372 iter->cpu_file);
5373 if (!info->spare)
5374 return -ENOMEM;
5375
5376 /* Do we have previous read data to read? */
5377 if (info->read < PAGE_SIZE)
5378 goto read;
5379
5380 again:
5381 trace_access_lock(iter->cpu_file);
5382 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5383 &info->spare,
5384 count,
5385 iter->cpu_file, 0);
5386 trace_access_unlock(iter->cpu_file);
5387
5388 if (ret < 0) {
5389 if (trace_empty(iter)) {
5390 if ((filp->f_flags & O_NONBLOCK))
5391 return -EAGAIN;
5392
5393 ret = wait_on_pipe(iter, false);
5394 if (ret)
5395 return ret;
5396
5397 goto again;
5398 }
5399 return 0;
5400 }
5401
5402 info->read = 0;
5403 read:
5404 size = PAGE_SIZE - info->read;
5405 if (size > count)
5406 size = count;
5407
5408 ret = copy_to_user(ubuf, info->spare + info->read, size);
5409 if (ret == size)
5410 return -EFAULT;
5411
5412 size -= ret;
5413
5414 *ppos += size;
5415 info->read += size;
5416
5417 return size;
5418 }
5419
5420 static int tracing_buffers_release(struct inode *inode, struct file *file)
5421 {
5422 struct ftrace_buffer_info *info = file->private_data;
5423 struct trace_iterator *iter = &info->iter;
5424
5425 mutex_lock(&trace_types_lock);
5426
5427 iter->tr->current_trace->ref--;
5428
5429 __trace_array_put(iter->tr);
5430
5431 if (info->spare)
5432 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5433 kfree(info);
5434
5435 mutex_unlock(&trace_types_lock);
5436
5437 return 0;
5438 }
5439
5440 struct buffer_ref {
5441 struct ring_buffer *buffer;
5442 void *page;
5443 int ref;
5444 };
5445
5446 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5447 struct pipe_buffer *buf)
5448 {
5449 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5450
5451 if (--ref->ref)
5452 return;
5453
5454 ring_buffer_free_read_page(ref->buffer, ref->page);
5455 kfree(ref);
5456 buf->private = 0;
5457 }
5458
5459 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5460 struct pipe_buffer *buf)
5461 {
5462 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5463
5464 ref->ref++;
5465 }
5466
5467 /* Pipe buffer operations for a buffer. */
5468 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5469 .can_merge = 0,
5470 .confirm = generic_pipe_buf_confirm,
5471 .release = buffer_pipe_buf_release,
5472 .steal = generic_pipe_buf_steal,
5473 .get = buffer_pipe_buf_get,
5474 };
5475
5476 /*
5477 * Callback from splice_to_pipe(), if we need to release some pages
5478 * at the end of the spd in case we error'ed out in filling the pipe.
5479 */
5480 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5481 {
5482 struct buffer_ref *ref =
5483 (struct buffer_ref *)spd->partial[i].private;
5484
5485 if (--ref->ref)
5486 return;
5487
5488 ring_buffer_free_read_page(ref->buffer, ref->page);
5489 kfree(ref);
5490 spd->partial[i].private = 0;
5491 }
5492
5493 static ssize_t
5494 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5495 struct pipe_inode_info *pipe, size_t len,
5496 unsigned int flags)
5497 {
5498 struct ftrace_buffer_info *info = file->private_data;
5499 struct trace_iterator *iter = &info->iter;
5500 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5501 struct page *pages_def[PIPE_DEF_BUFFERS];
5502 struct splice_pipe_desc spd = {
5503 .pages = pages_def,
5504 .partial = partial_def,
5505 .nr_pages_max = PIPE_DEF_BUFFERS,
5506 .flags = flags,
5507 .ops = &buffer_pipe_buf_ops,
5508 .spd_release = buffer_spd_release,
5509 };
5510 struct buffer_ref *ref;
5511 int entries, size, i;
5512 ssize_t ret = 0;
5513
5514 #ifdef CONFIG_TRACER_MAX_TRACE
5515 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5516 return -EBUSY;
5517 #endif
5518
5519 if (splice_grow_spd(pipe, &spd))
5520 return -ENOMEM;
5521
5522 if (*ppos & (PAGE_SIZE - 1))
5523 return -EINVAL;
5524
5525 if (len & (PAGE_SIZE - 1)) {
5526 if (len < PAGE_SIZE)
5527 return -EINVAL;
5528 len &= PAGE_MASK;
5529 }
5530
5531 again:
5532 trace_access_lock(iter->cpu_file);
5533 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5534
5535 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5536 struct page *page;
5537 int r;
5538
5539 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5540 if (!ref) {
5541 ret = -ENOMEM;
5542 break;
5543 }
5544
5545 ref->ref = 1;
5546 ref->buffer = iter->trace_buffer->buffer;
5547 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5548 if (!ref->page) {
5549 ret = -ENOMEM;
5550 kfree(ref);
5551 break;
5552 }
5553
5554 r = ring_buffer_read_page(ref->buffer, &ref->page,
5555 len, iter->cpu_file, 1);
5556 if (r < 0) {
5557 ring_buffer_free_read_page(ref->buffer, ref->page);
5558 kfree(ref);
5559 break;
5560 }
5561
5562 /*
5563 * zero out any left over data, this is going to
5564 * user land.
5565 */
5566 size = ring_buffer_page_len(ref->page);
5567 if (size < PAGE_SIZE)
5568 memset(ref->page + size, 0, PAGE_SIZE - size);
5569
5570 page = virt_to_page(ref->page);
5571
5572 spd.pages[i] = page;
5573 spd.partial[i].len = PAGE_SIZE;
5574 spd.partial[i].offset = 0;
5575 spd.partial[i].private = (unsigned long)ref;
5576 spd.nr_pages++;
5577 *ppos += PAGE_SIZE;
5578
5579 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5580 }
5581
5582 trace_access_unlock(iter->cpu_file);
5583 spd.nr_pages = i;
5584
5585 /* did we read anything? */
5586 if (!spd.nr_pages) {
5587 if (ret)
5588 return ret;
5589
5590 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5591 return -EAGAIN;
5592
5593 ret = wait_on_pipe(iter, true);
5594 if (ret)
5595 return ret;
5596
5597 goto again;
5598 }
5599
5600 ret = splice_to_pipe(pipe, &spd);
5601 splice_shrink_spd(&spd);
5602
5603 return ret;
5604 }
5605
5606 static const struct file_operations tracing_buffers_fops = {
5607 .open = tracing_buffers_open,
5608 .read = tracing_buffers_read,
5609 .poll = tracing_buffers_poll,
5610 .release = tracing_buffers_release,
5611 .splice_read = tracing_buffers_splice_read,
5612 .llseek = no_llseek,
5613 };
5614
5615 static ssize_t
5616 tracing_stats_read(struct file *filp, char __user *ubuf,
5617 size_t count, loff_t *ppos)
5618 {
5619 struct inode *inode = file_inode(filp);
5620 struct trace_array *tr = inode->i_private;
5621 struct trace_buffer *trace_buf = &tr->trace_buffer;
5622 int cpu = tracing_get_cpu(inode);
5623 struct trace_seq *s;
5624 unsigned long cnt;
5625 unsigned long long t;
5626 unsigned long usec_rem;
5627
5628 s = kmalloc(sizeof(*s), GFP_KERNEL);
5629 if (!s)
5630 return -ENOMEM;
5631
5632 trace_seq_init(s);
5633
5634 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5635 trace_seq_printf(s, "entries: %ld\n", cnt);
5636
5637 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5638 trace_seq_printf(s, "overrun: %ld\n", cnt);
5639
5640 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5641 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5642
5643 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5644 trace_seq_printf(s, "bytes: %ld\n", cnt);
5645
5646 if (trace_clocks[tr->clock_id].in_ns) {
5647 /* local or global for trace_clock */
5648 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5649 usec_rem = do_div(t, USEC_PER_SEC);
5650 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5651 t, usec_rem);
5652
5653 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5654 usec_rem = do_div(t, USEC_PER_SEC);
5655 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5656 } else {
5657 /* counter or tsc mode for trace_clock */
5658 trace_seq_printf(s, "oldest event ts: %llu\n",
5659 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5660
5661 trace_seq_printf(s, "now ts: %llu\n",
5662 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5663 }
5664
5665 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5666 trace_seq_printf(s, "dropped events: %ld\n", cnt);
5667
5668 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5669 trace_seq_printf(s, "read events: %ld\n", cnt);
5670
5671 count = simple_read_from_buffer(ubuf, count, ppos,
5672 s->buffer, trace_seq_used(s));
5673
5674 kfree(s);
5675
5676 return count;
5677 }
5678
5679 static const struct file_operations tracing_stats_fops = {
5680 .open = tracing_open_generic_tr,
5681 .read = tracing_stats_read,
5682 .llseek = generic_file_llseek,
5683 .release = tracing_release_generic_tr,
5684 };
5685
5686 #ifdef CONFIG_DYNAMIC_FTRACE
5687
5688 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5689 {
5690 return 0;
5691 }
5692
5693 static ssize_t
5694 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5695 size_t cnt, loff_t *ppos)
5696 {
5697 static char ftrace_dyn_info_buffer[1024];
5698 static DEFINE_MUTEX(dyn_info_mutex);
5699 unsigned long *p = filp->private_data;
5700 char *buf = ftrace_dyn_info_buffer;
5701 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5702 int r;
5703
5704 mutex_lock(&dyn_info_mutex);
5705 r = sprintf(buf, "%ld ", *p);
5706
5707 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5708 buf[r++] = '\n';
5709
5710 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5711
5712 mutex_unlock(&dyn_info_mutex);
5713
5714 return r;
5715 }
5716
5717 static const struct file_operations tracing_dyn_info_fops = {
5718 .open = tracing_open_generic,
5719 .read = tracing_read_dyn_info,
5720 .llseek = generic_file_llseek,
5721 };
5722 #endif /* CONFIG_DYNAMIC_FTRACE */
5723
5724 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5725 static void
5726 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5727 {
5728 tracing_snapshot();
5729 }
5730
5731 static void
5732 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5733 {
5734 unsigned long *count = (long *)data;
5735
5736 if (!*count)
5737 return;
5738
5739 if (*count != -1)
5740 (*count)--;
5741
5742 tracing_snapshot();
5743 }
5744
5745 static int
5746 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5747 struct ftrace_probe_ops *ops, void *data)
5748 {
5749 long count = (long)data;
5750
5751 seq_printf(m, "%ps:", (void *)ip);
5752
5753 seq_puts(m, "snapshot");
5754
5755 if (count == -1)
5756 seq_puts(m, ":unlimited\n");
5757 else
5758 seq_printf(m, ":count=%ld\n", count);
5759
5760 return 0;
5761 }
5762
5763 static struct ftrace_probe_ops snapshot_probe_ops = {
5764 .func = ftrace_snapshot,
5765 .print = ftrace_snapshot_print,
5766 };
5767
5768 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5769 .func = ftrace_count_snapshot,
5770 .print = ftrace_snapshot_print,
5771 };
5772
5773 static int
5774 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5775 char *glob, char *cmd, char *param, int enable)
5776 {
5777 struct ftrace_probe_ops *ops;
5778 void *count = (void *)-1;
5779 char *number;
5780 int ret;
5781
5782 /* hash funcs only work with set_ftrace_filter */
5783 if (!enable)
5784 return -EINVAL;
5785
5786 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
5787
5788 if (glob[0] == '!') {
5789 unregister_ftrace_function_probe_func(glob+1, ops);
5790 return 0;
5791 }
5792
5793 if (!param)
5794 goto out_reg;
5795
5796 number = strsep(&param, ":");
5797
5798 if (!strlen(number))
5799 goto out_reg;
5800
5801 /*
5802 * We use the callback data field (which is a pointer)
5803 * as our counter.
5804 */
5805 ret = kstrtoul(number, 0, (unsigned long *)&count);
5806 if (ret)
5807 return ret;
5808
5809 out_reg:
5810 ret = register_ftrace_function_probe(glob, ops, count);
5811
5812 if (ret >= 0)
5813 alloc_snapshot(&global_trace);
5814
5815 return ret < 0 ? ret : 0;
5816 }
5817
5818 static struct ftrace_func_command ftrace_snapshot_cmd = {
5819 .name = "snapshot",
5820 .func = ftrace_trace_snapshot_callback,
5821 };
5822
5823 static __init int register_snapshot_cmd(void)
5824 {
5825 return register_ftrace_command(&ftrace_snapshot_cmd);
5826 }
5827 #else
5828 static inline __init int register_snapshot_cmd(void) { return 0; }
5829 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5830
5831 static struct dentry *tracing_get_dentry(struct trace_array *tr)
5832 {
5833 if (WARN_ON(!tr->dir))
5834 return ERR_PTR(-ENODEV);
5835
5836 /* Top directory uses NULL as the parent */
5837 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5838 return NULL;
5839
5840 /* All sub buffers have a descriptor */
5841 return tr->dir;
5842 }
5843
5844 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5845 {
5846 struct dentry *d_tracer;
5847
5848 if (tr->percpu_dir)
5849 return tr->percpu_dir;
5850
5851 d_tracer = tracing_get_dentry(tr);
5852 if (IS_ERR(d_tracer))
5853 return NULL;
5854
5855 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
5856
5857 WARN_ONCE(!tr->percpu_dir,
5858 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
5859
5860 return tr->percpu_dir;
5861 }
5862
5863 static struct dentry *
5864 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5865 void *data, long cpu, const struct file_operations *fops)
5866 {
5867 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5868
5869 if (ret) /* See tracing_get_cpu() */
5870 ret->d_inode->i_cdev = (void *)(cpu + 1);
5871 return ret;
5872 }
5873
5874 static void
5875 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
5876 {
5877 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5878 struct dentry *d_cpu;
5879 char cpu_dir[30]; /* 30 characters should be more than enough */
5880
5881 if (!d_percpu)
5882 return;
5883
5884 snprintf(cpu_dir, 30, "cpu%ld", cpu);
5885 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
5886 if (!d_cpu) {
5887 pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
5888 return;
5889 }
5890
5891 /* per cpu trace_pipe */
5892 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5893 tr, cpu, &tracing_pipe_fops);
5894
5895 /* per cpu trace */
5896 trace_create_cpu_file("trace", 0644, d_cpu,
5897 tr, cpu, &tracing_fops);
5898
5899 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5900 tr, cpu, &tracing_buffers_fops);
5901
5902 trace_create_cpu_file("stats", 0444, d_cpu,
5903 tr, cpu, &tracing_stats_fops);
5904
5905 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5906 tr, cpu, &tracing_entries_fops);
5907
5908 #ifdef CONFIG_TRACER_SNAPSHOT
5909 trace_create_cpu_file("snapshot", 0644, d_cpu,
5910 tr, cpu, &snapshot_fops);
5911
5912 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5913 tr, cpu, &snapshot_raw_fops);
5914 #endif
5915 }
5916
5917 #ifdef CONFIG_FTRACE_SELFTEST
5918 /* Let selftest have access to static functions in this file */
5919 #include "trace_selftest.c"
5920 #endif
5921
5922 struct trace_option_dentry {
5923 struct tracer_opt *opt;
5924 struct tracer_flags *flags;
5925 struct trace_array *tr;
5926 struct dentry *entry;
5927 };
5928
5929 static ssize_t
5930 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5931 loff_t *ppos)
5932 {
5933 struct trace_option_dentry *topt = filp->private_data;
5934 char *buf;
5935
5936 if (topt->flags->val & topt->opt->bit)
5937 buf = "1\n";
5938 else
5939 buf = "0\n";
5940
5941 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5942 }
5943
5944 static ssize_t
5945 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5946 loff_t *ppos)
5947 {
5948 struct trace_option_dentry *topt = filp->private_data;
5949 unsigned long val;
5950 int ret;
5951
5952 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5953 if (ret)
5954 return ret;
5955
5956 if (val != 0 && val != 1)
5957 return -EINVAL;
5958
5959 if (!!(topt->flags->val & topt->opt->bit) != val) {
5960 mutex_lock(&trace_types_lock);
5961 ret = __set_tracer_option(topt->tr, topt->flags,
5962 topt->opt, !val);
5963 mutex_unlock(&trace_types_lock);
5964 if (ret)
5965 return ret;
5966 }
5967
5968 *ppos += cnt;
5969
5970 return cnt;
5971 }
5972
5973
5974 static const struct file_operations trace_options_fops = {
5975 .open = tracing_open_generic,
5976 .read = trace_options_read,
5977 .write = trace_options_write,
5978 .llseek = generic_file_llseek,
5979 };
5980
5981 static ssize_t
5982 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5983 loff_t *ppos)
5984 {
5985 long index = (long)filp->private_data;
5986 char *buf;
5987
5988 if (trace_flags & (1 << index))
5989 buf = "1\n";
5990 else
5991 buf = "0\n";
5992
5993 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5994 }
5995
5996 static ssize_t
5997 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5998 loff_t *ppos)
5999 {
6000 struct trace_array *tr = &global_trace;
6001 long index = (long)filp->private_data;
6002 unsigned long val;
6003 int ret;
6004
6005 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6006 if (ret)
6007 return ret;
6008
6009 if (val != 0 && val != 1)
6010 return -EINVAL;
6011
6012 mutex_lock(&trace_types_lock);
6013 ret = set_tracer_flag(tr, 1 << index, val);
6014 mutex_unlock(&trace_types_lock);
6015
6016 if (ret < 0)
6017 return ret;
6018
6019 *ppos += cnt;
6020
6021 return cnt;
6022 }
6023
6024 static const struct file_operations trace_options_core_fops = {
6025 .open = tracing_open_generic,
6026 .read = trace_options_core_read,
6027 .write = trace_options_core_write,
6028 .llseek = generic_file_llseek,
6029 };
6030
6031 struct dentry *trace_create_file(const char *name,
6032 umode_t mode,
6033 struct dentry *parent,
6034 void *data,
6035 const struct file_operations *fops)
6036 {
6037 struct dentry *ret;
6038
6039 ret = tracefs_create_file(name, mode, parent, data, fops);
6040 if (!ret)
6041 pr_warning("Could not create tracefs '%s' entry\n", name);
6042
6043 return ret;
6044 }
6045
6046
6047 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6048 {
6049 struct dentry *d_tracer;
6050
6051 if (tr->options)
6052 return tr->options;
6053
6054 d_tracer = tracing_get_dentry(tr);
6055 if (IS_ERR(d_tracer))
6056 return NULL;
6057
6058 tr->options = tracefs_create_dir("options", d_tracer);
6059 if (!tr->options) {
6060 pr_warning("Could not create tracefs directory 'options'\n");
6061 return NULL;
6062 }
6063
6064 return tr->options;
6065 }
6066
6067 static void
6068 create_trace_option_file(struct trace_array *tr,
6069 struct trace_option_dentry *topt,
6070 struct tracer_flags *flags,
6071 struct tracer_opt *opt)
6072 {
6073 struct dentry *t_options;
6074
6075 t_options = trace_options_init_dentry(tr);
6076 if (!t_options)
6077 return;
6078
6079 topt->flags = flags;
6080 topt->opt = opt;
6081 topt->tr = tr;
6082
6083 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6084 &trace_options_fops);
6085
6086 }
6087
6088 static struct trace_option_dentry *
6089 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6090 {
6091 struct trace_option_dentry *topts;
6092 struct tracer_flags *flags;
6093 struct tracer_opt *opts;
6094 int cnt;
6095
6096 if (!tracer)
6097 return NULL;
6098
6099 flags = tracer->flags;
6100
6101 if (!flags || !flags->opts)
6102 return NULL;
6103
6104 opts = flags->opts;
6105
6106 for (cnt = 0; opts[cnt].name; cnt++)
6107 ;
6108
6109 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6110 if (!topts)
6111 return NULL;
6112
6113 for (cnt = 0; opts[cnt].name; cnt++)
6114 create_trace_option_file(tr, &topts[cnt], flags,
6115 &opts[cnt]);
6116
6117 return topts;
6118 }
6119
6120 static void
6121 destroy_trace_option_files(struct trace_option_dentry *topts)
6122 {
6123 int cnt;
6124
6125 if (!topts)
6126 return;
6127
6128 for (cnt = 0; topts[cnt].opt; cnt++)
6129 tracefs_remove(topts[cnt].entry);
6130
6131 kfree(topts);
6132 }
6133
6134 static struct dentry *
6135 create_trace_option_core_file(struct trace_array *tr,
6136 const char *option, long index)
6137 {
6138 struct dentry *t_options;
6139
6140 t_options = trace_options_init_dentry(tr);
6141 if (!t_options)
6142 return NULL;
6143
6144 return trace_create_file(option, 0644, t_options, (void *)index,
6145 &trace_options_core_fops);
6146 }
6147
6148 static __init void create_trace_options_dir(struct trace_array *tr)
6149 {
6150 struct dentry *t_options;
6151 int i;
6152
6153 t_options = trace_options_init_dentry(tr);
6154 if (!t_options)
6155 return;
6156
6157 for (i = 0; trace_options[i]; i++)
6158 create_trace_option_core_file(tr, trace_options[i], i);
6159 }
6160
6161 static ssize_t
6162 rb_simple_read(struct file *filp, char __user *ubuf,
6163 size_t cnt, loff_t *ppos)
6164 {
6165 struct trace_array *tr = filp->private_data;
6166 char buf[64];
6167 int r;
6168
6169 r = tracer_tracing_is_on(tr);
6170 r = sprintf(buf, "%d\n", r);
6171
6172 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6173 }
6174
6175 static ssize_t
6176 rb_simple_write(struct file *filp, const char __user *ubuf,
6177 size_t cnt, loff_t *ppos)
6178 {
6179 struct trace_array *tr = filp->private_data;
6180 struct ring_buffer *buffer = tr->trace_buffer.buffer;
6181 unsigned long val;
6182 int ret;
6183
6184 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6185 if (ret)
6186 return ret;
6187
6188 if (buffer) {
6189 mutex_lock(&trace_types_lock);
6190 if (val) {
6191 tracer_tracing_on(tr);
6192 if (tr->current_trace->start)
6193 tr->current_trace->start(tr);
6194 } else {
6195 tracer_tracing_off(tr);
6196 if (tr->current_trace->stop)
6197 tr->current_trace->stop(tr);
6198 }
6199 mutex_unlock(&trace_types_lock);
6200 }
6201
6202 (*ppos)++;
6203
6204 return cnt;
6205 }
6206
6207 static const struct file_operations rb_simple_fops = {
6208 .open = tracing_open_generic_tr,
6209 .read = rb_simple_read,
6210 .write = rb_simple_write,
6211 .release = tracing_release_generic_tr,
6212 .llseek = default_llseek,
6213 };
6214
6215 struct dentry *trace_instance_dir;
6216
6217 static void
6218 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6219
6220 static int
6221 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6222 {
6223 enum ring_buffer_flags rb_flags;
6224
6225 rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6226
6227 buf->tr = tr;
6228
6229 buf->buffer = ring_buffer_alloc(size, rb_flags);
6230 if (!buf->buffer)
6231 return -ENOMEM;
6232
6233 buf->data = alloc_percpu(struct trace_array_cpu);
6234 if (!buf->data) {
6235 ring_buffer_free(buf->buffer);
6236 return -ENOMEM;
6237 }
6238
6239 /* Allocate the first page for all buffers */
6240 set_buffer_entries(&tr->trace_buffer,
6241 ring_buffer_size(tr->trace_buffer.buffer, 0));
6242
6243 return 0;
6244 }
6245
6246 static int allocate_trace_buffers(struct trace_array *tr, int size)
6247 {
6248 int ret;
6249
6250 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6251 if (ret)
6252 return ret;
6253
6254 #ifdef CONFIG_TRACER_MAX_TRACE
6255 ret = allocate_trace_buffer(tr, &tr->max_buffer,
6256 allocate_snapshot ? size : 1);
6257 if (WARN_ON(ret)) {
6258 ring_buffer_free(tr->trace_buffer.buffer);
6259 free_percpu(tr->trace_buffer.data);
6260 return -ENOMEM;
6261 }
6262 tr->allocated_snapshot = allocate_snapshot;
6263
6264 /*
6265 * Only the top level trace array gets its snapshot allocated
6266 * from the kernel command line.
6267 */
6268 allocate_snapshot = false;
6269 #endif
6270 return 0;
6271 }
6272
6273 static void free_trace_buffer(struct trace_buffer *buf)
6274 {
6275 if (buf->buffer) {
6276 ring_buffer_free(buf->buffer);
6277 buf->buffer = NULL;
6278 free_percpu(buf->data);
6279 buf->data = NULL;
6280 }
6281 }
6282
6283 static void free_trace_buffers(struct trace_array *tr)
6284 {
6285 if (!tr)
6286 return;
6287
6288 free_trace_buffer(&tr->trace_buffer);
6289
6290 #ifdef CONFIG_TRACER_MAX_TRACE
6291 free_trace_buffer(&tr->max_buffer);
6292 #endif
6293 }
6294
6295 static int instance_mkdir(const char *name)
6296 {
6297 struct trace_array *tr;
6298 int ret;
6299
6300 mutex_lock(&trace_types_lock);
6301
6302 ret = -EEXIST;
6303 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6304 if (tr->name && strcmp(tr->name, name) == 0)
6305 goto out_unlock;
6306 }
6307
6308 ret = -ENOMEM;
6309 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6310 if (!tr)
6311 goto out_unlock;
6312
6313 tr->name = kstrdup(name, GFP_KERNEL);
6314 if (!tr->name)
6315 goto out_free_tr;
6316
6317 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6318 goto out_free_tr;
6319
6320 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6321
6322 raw_spin_lock_init(&tr->start_lock);
6323
6324 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6325
6326 tr->current_trace = &nop_trace;
6327
6328 INIT_LIST_HEAD(&tr->systems);
6329 INIT_LIST_HEAD(&tr->events);
6330
6331 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6332 goto out_free_tr;
6333
6334 tr->dir = tracefs_create_dir(name, trace_instance_dir);
6335 if (!tr->dir)
6336 goto out_free_tr;
6337
6338 ret = event_trace_add_tracer(tr->dir, tr);
6339 if (ret) {
6340 tracefs_remove_recursive(tr->dir);
6341 goto out_free_tr;
6342 }
6343
6344 init_tracer_tracefs(tr, tr->dir);
6345
6346 list_add(&tr->list, &ftrace_trace_arrays);
6347
6348 mutex_unlock(&trace_types_lock);
6349
6350 return 0;
6351
6352 out_free_tr:
6353 free_trace_buffers(tr);
6354 free_cpumask_var(tr->tracing_cpumask);
6355 kfree(tr->name);
6356 kfree(tr);
6357
6358 out_unlock:
6359 mutex_unlock(&trace_types_lock);
6360
6361 return ret;
6362
6363 }
6364
6365 static int instance_rmdir(const char *name)
6366 {
6367 struct trace_array *tr;
6368 int found = 0;
6369 int ret;
6370
6371 mutex_lock(&trace_types_lock);
6372
6373 ret = -ENODEV;
6374 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6375 if (tr->name && strcmp(tr->name, name) == 0) {
6376 found = 1;
6377 break;
6378 }
6379 }
6380 if (!found)
6381 goto out_unlock;
6382
6383 ret = -EBUSY;
6384 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6385 goto out_unlock;
6386
6387 list_del(&tr->list);
6388
6389 tracing_set_nop(tr);
6390 event_trace_del_tracer(tr);
6391 ftrace_destroy_function_files(tr);
6392 debugfs_remove_recursive(tr->dir);
6393 free_trace_buffers(tr);
6394
6395 kfree(tr->name);
6396 kfree(tr);
6397
6398 ret = 0;
6399
6400 out_unlock:
6401 mutex_unlock(&trace_types_lock);
6402
6403 return ret;
6404 }
6405
6406 static __init void create_trace_instances(struct dentry *d_tracer)
6407 {
6408 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6409 instance_mkdir,
6410 instance_rmdir);
6411 if (WARN_ON(!trace_instance_dir))
6412 return;
6413 }
6414
6415 static void
6416 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6417 {
6418 int cpu;
6419
6420 trace_create_file("available_tracers", 0444, d_tracer,
6421 tr, &show_traces_fops);
6422
6423 trace_create_file("current_tracer", 0644, d_tracer,
6424 tr, &set_tracer_fops);
6425
6426 trace_create_file("tracing_cpumask", 0644, d_tracer,
6427 tr, &tracing_cpumask_fops);
6428
6429 trace_create_file("trace_options", 0644, d_tracer,
6430 tr, &tracing_iter_fops);
6431
6432 trace_create_file("trace", 0644, d_tracer,
6433 tr, &tracing_fops);
6434
6435 trace_create_file("trace_pipe", 0444, d_tracer,
6436 tr, &tracing_pipe_fops);
6437
6438 trace_create_file("buffer_size_kb", 0644, d_tracer,
6439 tr, &tracing_entries_fops);
6440
6441 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6442 tr, &tracing_total_entries_fops);
6443
6444 trace_create_file("free_buffer", 0200, d_tracer,
6445 tr, &tracing_free_buffer_fops);
6446
6447 trace_create_file("trace_marker", 0220, d_tracer,
6448 tr, &tracing_mark_fops);
6449
6450 trace_create_file("trace_clock", 0644, d_tracer, tr,
6451 &trace_clock_fops);
6452
6453 trace_create_file("tracing_on", 0644, d_tracer,
6454 tr, &rb_simple_fops);
6455
6456 #ifdef CONFIG_TRACER_MAX_TRACE
6457 trace_create_file("tracing_max_latency", 0644, d_tracer,
6458 &tr->max_latency, &tracing_max_lat_fops);
6459 #endif
6460
6461 if (ftrace_create_function_files(tr, d_tracer))
6462 WARN(1, "Could not allocate function filter files");
6463
6464 #ifdef CONFIG_TRACER_SNAPSHOT
6465 trace_create_file("snapshot", 0644, d_tracer,
6466 tr, &snapshot_fops);
6467 #endif
6468
6469 for_each_tracing_cpu(cpu)
6470 tracing_init_tracefs_percpu(tr, cpu);
6471
6472 }
6473
6474 static struct vfsmount *trace_automount(void *ingore)
6475 {
6476 struct vfsmount *mnt;
6477 struct file_system_type *type;
6478
6479 /*
6480 * To maintain backward compatibility for tools that mount
6481 * debugfs to get to the tracing facility, tracefs is automatically
6482 * mounted to the debugfs/tracing directory.
6483 */
6484 type = get_fs_type("tracefs");
6485 if (!type)
6486 return NULL;
6487 mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6488 put_filesystem(type);
6489 if (IS_ERR(mnt))
6490 return NULL;
6491 mntget(mnt);
6492
6493 return mnt;
6494 }
6495
6496 /**
6497 * tracing_init_dentry - initialize top level trace array
6498 *
6499 * This is called when creating files or directories in the tracing
6500 * directory. It is called via fs_initcall() by any of the boot up code
6501 * and expects to return the dentry of the top level tracing directory.
6502 */
6503 struct dentry *tracing_init_dentry(void)
6504 {
6505 struct trace_array *tr = &global_trace;
6506
6507 /* The top level trace array uses NULL as parent */
6508 if (tr->dir)
6509 return NULL;
6510
6511 if (WARN_ON(!debugfs_initialized()))
6512 return ERR_PTR(-ENODEV);
6513
6514 /*
6515 * As there may still be users that expect the tracing
6516 * files to exist in debugfs/tracing, we must automount
6517 * the tracefs file system there, so older tools still
6518 * work with the newer kerenl.
6519 */
6520 tr->dir = debugfs_create_automount("tracing", NULL,
6521 trace_automount, NULL);
6522 if (!tr->dir) {
6523 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6524 return ERR_PTR(-ENOMEM);
6525 }
6526
6527 return NULL;
6528 }
6529
6530 static __init int tracer_init_tracefs(void)
6531 {
6532 struct dentry *d_tracer;
6533
6534 trace_access_lock_init();
6535
6536 d_tracer = tracing_init_dentry();
6537 if (IS_ERR(d_tracer))
6538 return 0;
6539
6540 init_tracer_tracefs(&global_trace, d_tracer);
6541
6542 trace_create_file("tracing_thresh", 0644, d_tracer,
6543 &global_trace, &tracing_thresh_fops);
6544
6545 trace_create_file("README", 0444, d_tracer,
6546 NULL, &tracing_readme_fops);
6547
6548 trace_create_file("saved_cmdlines", 0444, d_tracer,
6549 NULL, &tracing_saved_cmdlines_fops);
6550
6551 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6552 NULL, &tracing_saved_cmdlines_size_fops);
6553
6554 #ifdef CONFIG_DYNAMIC_FTRACE
6555 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6556 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6557 #endif
6558
6559 create_trace_instances(d_tracer);
6560
6561 create_trace_options_dir(&global_trace);
6562
6563 /* If the tracer was started via cmdline, create options for it here */
6564 if (global_trace.current_trace != &nop_trace)
6565 update_tracer_options(&global_trace, global_trace.current_trace);
6566
6567 return 0;
6568 }
6569
6570 static int trace_panic_handler(struct notifier_block *this,
6571 unsigned long event, void *unused)
6572 {
6573 if (ftrace_dump_on_oops)
6574 ftrace_dump(ftrace_dump_on_oops);
6575 return NOTIFY_OK;
6576 }
6577
6578 static struct notifier_block trace_panic_notifier = {
6579 .notifier_call = trace_panic_handler,
6580 .next = NULL,
6581 .priority = 150 /* priority: INT_MAX >= x >= 0 */
6582 };
6583
6584 static int trace_die_handler(struct notifier_block *self,
6585 unsigned long val,
6586 void *data)
6587 {
6588 switch (val) {
6589 case DIE_OOPS:
6590 if (ftrace_dump_on_oops)
6591 ftrace_dump(ftrace_dump_on_oops);
6592 break;
6593 default:
6594 break;
6595 }
6596 return NOTIFY_OK;
6597 }
6598
6599 static struct notifier_block trace_die_notifier = {
6600 .notifier_call = trace_die_handler,
6601 .priority = 200
6602 };
6603
6604 /*
6605 * printk is set to max of 1024, we really don't need it that big.
6606 * Nothing should be printing 1000 characters anyway.
6607 */
6608 #define TRACE_MAX_PRINT 1000
6609
6610 /*
6611 * Define here KERN_TRACE so that we have one place to modify
6612 * it if we decide to change what log level the ftrace dump
6613 * should be at.
6614 */
6615 #define KERN_TRACE KERN_EMERG
6616
6617 void
6618 trace_printk_seq(struct trace_seq *s)
6619 {
6620 /* Probably should print a warning here. */
6621 if (s->seq.len >= TRACE_MAX_PRINT)
6622 s->seq.len = TRACE_MAX_PRINT;
6623
6624 /*
6625 * More paranoid code. Although the buffer size is set to
6626 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
6627 * an extra layer of protection.
6628 */
6629 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
6630 s->seq.len = s->seq.size - 1;
6631
6632 /* should be zero ended, but we are paranoid. */
6633 s->buffer[s->seq.len] = 0;
6634
6635 printk(KERN_TRACE "%s", s->buffer);
6636
6637 trace_seq_init(s);
6638 }
6639
6640 void trace_init_global_iter(struct trace_iterator *iter)
6641 {
6642 iter->tr = &global_trace;
6643 iter->trace = iter->tr->current_trace;
6644 iter->cpu_file = RING_BUFFER_ALL_CPUS;
6645 iter->trace_buffer = &global_trace.trace_buffer;
6646
6647 if (iter->trace && iter->trace->open)
6648 iter->trace->open(iter);
6649
6650 /* Annotate start of buffers if we had overruns */
6651 if (ring_buffer_overruns(iter->trace_buffer->buffer))
6652 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6653
6654 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6655 if (trace_clocks[iter->tr->clock_id].in_ns)
6656 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6657 }
6658
6659 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6660 {
6661 /* use static because iter can be a bit big for the stack */
6662 static struct trace_iterator iter;
6663 static atomic_t dump_running;
6664 unsigned int old_userobj;
6665 unsigned long flags;
6666 int cnt = 0, cpu;
6667
6668 /* Only allow one dump user at a time. */
6669 if (atomic_inc_return(&dump_running) != 1) {
6670 atomic_dec(&dump_running);
6671 return;
6672 }
6673
6674 /*
6675 * Always turn off tracing when we dump.
6676 * We don't need to show trace output of what happens
6677 * between multiple crashes.
6678 *
6679 * If the user does a sysrq-z, then they can re-enable
6680 * tracing with echo 1 > tracing_on.
6681 */
6682 tracing_off();
6683
6684 local_irq_save(flags);
6685
6686 /* Simulate the iterator */
6687 trace_init_global_iter(&iter);
6688
6689 for_each_tracing_cpu(cpu) {
6690 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6691 }
6692
6693 old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6694
6695 /* don't look at user memory in panic mode */
6696 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6697
6698 switch (oops_dump_mode) {
6699 case DUMP_ALL:
6700 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6701 break;
6702 case DUMP_ORIG:
6703 iter.cpu_file = raw_smp_processor_id();
6704 break;
6705 case DUMP_NONE:
6706 goto out_enable;
6707 default:
6708 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6709 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6710 }
6711
6712 printk(KERN_TRACE "Dumping ftrace buffer:\n");
6713
6714 /* Did function tracer already get disabled? */
6715 if (ftrace_is_dead()) {
6716 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6717 printk("# MAY BE MISSING FUNCTION EVENTS\n");
6718 }
6719
6720 /*
6721 * We need to stop all tracing on all CPUS to read the
6722 * the next buffer. This is a bit expensive, but is
6723 * not done often. We fill all what we can read,
6724 * and then release the locks again.
6725 */
6726
6727 while (!trace_empty(&iter)) {
6728
6729 if (!cnt)
6730 printk(KERN_TRACE "---------------------------------\n");
6731
6732 cnt++;
6733
6734 /* reset all but tr, trace, and overruns */
6735 memset(&iter.seq, 0,
6736 sizeof(struct trace_iterator) -
6737 offsetof(struct trace_iterator, seq));
6738 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6739 iter.pos = -1;
6740
6741 if (trace_find_next_entry_inc(&iter) != NULL) {
6742 int ret;
6743
6744 ret = print_trace_line(&iter);
6745 if (ret != TRACE_TYPE_NO_CONSUME)
6746 trace_consume(&iter);
6747 }
6748 touch_nmi_watchdog();
6749
6750 trace_printk_seq(&iter.seq);
6751 }
6752
6753 if (!cnt)
6754 printk(KERN_TRACE " (ftrace buffer empty)\n");
6755 else
6756 printk(KERN_TRACE "---------------------------------\n");
6757
6758 out_enable:
6759 trace_flags |= old_userobj;
6760
6761 for_each_tracing_cpu(cpu) {
6762 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6763 }
6764 atomic_dec(&dump_running);
6765 local_irq_restore(flags);
6766 }
6767 EXPORT_SYMBOL_GPL(ftrace_dump);
6768
6769 __init static int tracer_alloc_buffers(void)
6770 {
6771 int ring_buf_size;
6772 int ret = -ENOMEM;
6773
6774 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6775 goto out;
6776
6777 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6778 goto out_free_buffer_mask;
6779
6780 /* Only allocate trace_printk buffers if a trace_printk exists */
6781 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6782 /* Must be called before global_trace.buffer is allocated */
6783 trace_printk_init_buffers();
6784
6785 /* To save memory, keep the ring buffer size to its minimum */
6786 if (ring_buffer_expanded)
6787 ring_buf_size = trace_buf_size;
6788 else
6789 ring_buf_size = 1;
6790
6791 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6792 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6793
6794 raw_spin_lock_init(&global_trace.start_lock);
6795
6796 /* Used for event triggers */
6797 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6798 if (!temp_buffer)
6799 goto out_free_cpumask;
6800
6801 if (trace_create_savedcmd() < 0)
6802 goto out_free_temp_buffer;
6803
6804 /* TODO: make the number of buffers hot pluggable with CPUS */
6805 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6806 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6807 WARN_ON(1);
6808 goto out_free_savedcmd;
6809 }
6810
6811 if (global_trace.buffer_disabled)
6812 tracing_off();
6813
6814 if (trace_boot_clock) {
6815 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6816 if (ret < 0)
6817 pr_warning("Trace clock %s not defined, going back to default\n",
6818 trace_boot_clock);
6819 }
6820
6821 /*
6822 * register_tracer() might reference current_trace, so it
6823 * needs to be set before we register anything. This is
6824 * just a bootstrap of current_trace anyway.
6825 */
6826 global_trace.current_trace = &nop_trace;
6827
6828 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6829
6830 ftrace_init_global_array_ops(&global_trace);
6831
6832 register_tracer(&nop_trace);
6833
6834 /* All seems OK, enable tracing */
6835 tracing_disabled = 0;
6836
6837 atomic_notifier_chain_register(&panic_notifier_list,
6838 &trace_panic_notifier);
6839
6840 register_die_notifier(&trace_die_notifier);
6841
6842 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6843
6844 INIT_LIST_HEAD(&global_trace.systems);
6845 INIT_LIST_HEAD(&global_trace.events);
6846 list_add(&global_trace.list, &ftrace_trace_arrays);
6847
6848 while (trace_boot_options) {
6849 char *option;
6850
6851 option = strsep(&trace_boot_options, ",");
6852 trace_set_options(&global_trace, option);
6853 }
6854
6855 register_snapshot_cmd();
6856
6857 return 0;
6858
6859 out_free_savedcmd:
6860 free_saved_cmdlines_buffer(savedcmd);
6861 out_free_temp_buffer:
6862 ring_buffer_free(temp_buffer);
6863 out_free_cpumask:
6864 free_cpumask_var(global_trace.tracing_cpumask);
6865 out_free_buffer_mask:
6866 free_cpumask_var(tracing_buffer_mask);
6867 out:
6868 return ret;
6869 }
6870
6871 void __init trace_init(void)
6872 {
6873 if (tracepoint_printk) {
6874 tracepoint_print_iter =
6875 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
6876 if (WARN_ON(!tracepoint_print_iter))
6877 tracepoint_printk = 0;
6878 }
6879 tracer_alloc_buffers();
6880 trace_event_init();
6881 }
6882
6883 __init static int clear_boot_tracer(void)
6884 {
6885 /*
6886 * The default tracer at boot buffer is an init section.
6887 * This function is called in lateinit. If we did not
6888 * find the boot tracer, then clear it out, to prevent
6889 * later registration from accessing the buffer that is
6890 * about to be freed.
6891 */
6892 if (!default_bootup_tracer)
6893 return 0;
6894
6895 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6896 default_bootup_tracer);
6897 default_bootup_tracer = NULL;
6898
6899 return 0;
6900 }
6901
6902 fs_initcall(tracer_init_tracefs);
6903 late_initcall(clear_boot_tracer);