kernel/trace/trace.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * ring buffer based function tracer
   4  *
   5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
   6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   7  *
   8  * Originally taken from the RT patch by:
   9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
  10  *
  11  * Based on code from the latency_tracer, that is:
  12  *  Copyright (C) 2004-2006 Ingo Molnar
  13  *  Copyright (C) 2004 Nadia Yvette Chambers
  14  */
  15 #include <linux/ring_buffer.h>
  16 #include <generated/utsrelease.h>
  17 #include <linux/stacktrace.h>
  18 #include <linux/writeback.h>
  19 #include <linux/kallsyms.h>
  20 #include <linux/security.h>
  21 #include <linux/seq_file.h>
  22 #include <linux/notifier.h>
  23 #include <linux/irqflags.h>
  24 #include <linux/debugfs.h>
  25 #include <linux/tracefs.h>
  26 #include <linux/pagemap.h>
  27 #include <linux/hardirq.h>
  28 #include <linux/linkage.h>
  29 #include <linux/uaccess.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/ftrace.h>
  32 #include <linux/module.h>
  33 #include <linux/percpu.h>
  34 #include <linux/splice.h>
  35 #include <linux/kdebug.h>
  36 #include <linux/string.h>
  37 #include <linux/mount.h>
  38 #include <linux/rwsem.h>
  39 #include <linux/slab.h>
  40 #include <linux/ctype.h>
  41 #include <linux/init.h>
  42 #include <linux/panic_notifier.h>
  43 #include <linux/poll.h>
  44 #include <linux/nmi.h>
  45 #include <linux/fs.h>
  46 #include <linux/trace.h>
  47 #include <linux/sched/clock.h>
  48 #include <linux/sched/rt.h>
  49 #include <linux/fsnotify.h>
  50 #include <linux/irq_work.h>
  51 #include <linux/workqueue.h>
  52
  53 #include "trace.h"
  54 #include "trace_output.h"
  55
  56 /*
  57  * On boot up, the ring buffer is set to the minimum size, so that
  58  * we do not waste memory on systems that are not using tracing.
  59  */
  60 bool ring_buffer_expanded;
  61
  62 /*
  63  * We need to change this state when a selftest is running.
  64  * A selftest will lurk into the ring-buffer to count the
  65  * entries inserted during the selftest although some concurrent
  66  * insertions into the ring-buffer such as trace_printk could occurred
  67  * at the same time, giving false positive or negative results.
  68  */
  69 static bool __read_mostly tracing_selftest_running;
  70
  71 /*
  72  * If boot-time tracing including tracers/events via kernel cmdline
  73  * is running, we do not want to run SELFTEST.
  74  */
  75 bool __read_mostly tracing_selftest_disabled;
  76
  77 #ifdef CONFIG_FTRACE_STARTUP_TEST
  78 void __init disable_tracing_selftest(const char *reason)
  79 {
  80         if (!tracing_selftest_disabled) {
  81                 tracing_selftest_disabled = true;
  82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
  83         }
  84 }
  85 #endif
  86
  87 /* Pipe tracepoints to printk */
  88 struct trace_iterator *tracepoint_print_iter;
  89 int tracepoint_printk;
  90 static bool tracepoint_printk_stop_on_boot __initdata;
  91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
  92
  93 /* For tracers that don't implement custom flags */
  94 static struct tracer_opt dummy_tracer_opt[] = {
  95         { }
  96 };
  97
  98 static int
  99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 100 {
 101         return 0;
 102 }
 103
 104 /*
 105  * To prevent the comm cache from being overwritten when no
 106  * tracing is active, only save the comm when a trace event
 107  * occurred.
 108  */
 109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 110
 111 /*
 112  * Kill all tracing for good (never come back).
 113  * It is initialized to 1 but will turn to zero if the initialization
 114  * of the tracer is successful. But that is the only place that sets
 115  * this back to zero.
 116  */
 117 static int tracing_disabled = 1;
 118
 119 cpumask_var_t __read_mostly     tracing_buffer_mask;
 120
 121 /*
 122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 123  *
 124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 125  * is set, then ftrace_dump is called. This will output the contents
 126  * of the ftrace buffers to the console.  This is very useful for
 127  * capturing traces that lead to crashes and outputing it to a
 128  * serial console.
 129  *
 130  * It is default off, but you can enable it with either specifying
 131  * "ftrace_dump_on_oops" in the kernel command line, or setting
 132  * /proc/sys/kernel/ftrace_dump_on_oops
 133  * Set 1 if you want to dump buffers of all CPUs
 134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
 135  */
 136
 137 enum ftrace_dump_mode ftrace_dump_on_oops;
 138
 139 /* When set, tracing will stop when a WARN*() is hit */
 140 int __disable_trace_on_warning;
 141
 142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
 143 /* Map of enums to their values, for "eval_map" file */
 144 struct trace_eval_map_head {
 145         struct module                   *mod;
 146         unsigned long                   length;
 147 };
 148
 149 union trace_eval_map_item;
 150
 151 struct trace_eval_map_tail {
 152         /*
 153          * "end" is first and points to NULL as it must be different
 154          * than "mod" or "eval_string"
 155          */
 156         union trace_eval_map_item       *next;
 157         const char                      *end;   /* points to NULL */
 158 };
 159
 160 static DEFINE_MUTEX(trace_eval_mutex);
 161
 162 /*
 163  * The trace_eval_maps are saved in an array with two extra elements,
 164  * one at the beginning, and one at the end. The beginning item contains
 165  * the count of the saved maps (head.length), and the module they
 166  * belong to if not built in (head.mod). The ending item contains a
 167  * pointer to the next array of saved eval_map items.
 168  */
 169 union trace_eval_map_item {
 170         struct trace_eval_map           map;
 171         struct trace_eval_map_head      head;
 172         struct trace_eval_map_tail      tail;
 173 };
 174
 175 static union trace_eval_map_item *trace_eval_maps;
 176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 177
 178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
 179 static void ftrace_trace_userstack(struct trace_array *tr,
 180                                    struct trace_buffer *buffer,
 181                                    unsigned int trace_ctx);
 182
 183 #define MAX_TRACER_SIZE         100
 184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 185 static char *default_bootup_tracer;
 186
 187 static bool allocate_snapshot;
 188
 189 static int __init set_cmdline_ftrace(char *str)
 190 {
 191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 192         default_bootup_tracer = bootup_tracer_buf;
 193         /* We are using ftrace early, expand it */
 194         ring_buffer_expanded = true;
 195         return 1;
 196 }
 197 __setup("ftrace=", set_cmdline_ftrace);
 198
 199 static int __init set_ftrace_dump_on_oops(char *str)
 200 {
 201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
 202                 ftrace_dump_on_oops = DUMP_ALL;
 203                 return 1;
 204         }
 205
 206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
 207                 ftrace_dump_on_oops = DUMP_ORIG;
 208                 return 1;
 209         }
 210
 211         return 0;
 212 }
 213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 214
 215 static int __init stop_trace_on_warning(char *str)
 216 {
 217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 218                 __disable_trace_on_warning = 1;
 219         return 1;
 220 }
 221 __setup("traceoff_on_warning", stop_trace_on_warning);
 222
 223 static int __init boot_alloc_snapshot(char *str)
 224 {
 225         allocate_snapshot = true;
 226         /* We also need the main ring buffer expanded */
 227         ring_buffer_expanded = true;
 228         return 1;
 229 }
 230 __setup("alloc_snapshot", boot_alloc_snapshot);
 231
 232
 233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 234
 235 static int __init set_trace_boot_options(char *str)
 236 {
 237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
 238         return 1;
 239 }
 240 __setup("trace_options=", set_trace_boot_options);
 241
 242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 243 static char *trace_boot_clock __initdata;
 244
 245 static int __init set_trace_boot_clock(char *str)
 246 {
 247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 248         trace_boot_clock = trace_boot_clock_buf;
 249         return 1;
 250 }
 251 __setup("trace_clock=", set_trace_boot_clock);
 252
 253 static int __init set_tracepoint_printk(char *str)
 254 {
 255         /* Ignore the "tp_printk_stop_on_boot" param */
 256         if (*str == '_')
 257                 return 0;
 258
 259         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 260                 tracepoint_printk = 1;
 261         return 1;
 262 }
 263 __setup("tp_printk", set_tracepoint_printk);
 264
 265 static int __init set_tracepoint_printk_stop(char *str)
 266 {
 267         tracepoint_printk_stop_on_boot = true;
 268         return 1;
 269 }
 270 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
 271
 272 unsigned long long ns2usecs(u64 nsec)
 273 {
 274         nsec += 500;
 275         do_div(nsec, 1000);
 276         return nsec;
 277 }
 278
 279 static void
 280 trace_process_export(struct trace_export *export,
 281                struct ring_buffer_event *event, int flag)
 282 {
 283         struct trace_entry *entry;
 284         unsigned int size = 0;
 285
 286         if (export->flags & flag) {
 287                 entry = ring_buffer_event_data(event);
 288                 size = ring_buffer_event_length(event);
 289                 export->write(export, entry, size);
 290         }
 291 }
 292
 293 static DEFINE_MUTEX(ftrace_export_lock);
 294
 295 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
 296
 297 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
 298 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
 299 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
 300
 301 static inline void ftrace_exports_enable(struct trace_export *export)
 302 {
 303         if (export->flags & TRACE_EXPORT_FUNCTION)
 304                 static_branch_inc(&trace_function_exports_enabled);
 305
 306         if (export->flags & TRACE_EXPORT_EVENT)
 307                 static_branch_inc(&trace_event_exports_enabled);
 308
 309         if (export->flags & TRACE_EXPORT_MARKER)
 310                 static_branch_inc(&trace_marker_exports_enabled);
 311 }
 312
 313 static inline void ftrace_exports_disable(struct trace_export *export)
 314 {
 315         if (export->flags & TRACE_EXPORT_FUNCTION)
 316                 static_branch_dec(&trace_function_exports_enabled);
 317
 318         if (export->flags & TRACE_EXPORT_EVENT)
 319                 static_branch_dec(&trace_event_exports_enabled);
 320
 321         if (export->flags & TRACE_EXPORT_MARKER)
 322                 static_branch_dec(&trace_marker_exports_enabled);
 323 }
 324
 325 static void ftrace_exports(struct ring_buffer_event *event, int flag)
 326 {
 327         struct trace_export *export;
 328
 329         preempt_disable_notrace();
 330
 331         export = rcu_dereference_raw_check(ftrace_exports_list);
 332         while (export) {
 333                 trace_process_export(export, event, flag);
 334                 export = rcu_dereference_raw_check(export->next);
 335         }
 336
 337         preempt_enable_notrace();
 338 }
 339
 340 static inline void
 341 add_trace_export(struct trace_export **list, struct trace_export *export)
 342 {
 343         rcu_assign_pointer(export->next, *list);
 344         /*
 345          * We are entering export into the list but another
 346          * CPU might be walking that list. We need to make sure
 347          * the export->next pointer is valid before another CPU sees
 348          * the export pointer included into the list.
 349          */
 350         rcu_assign_pointer(*list, export);
 351 }
 352
 353 static inline int
 354 rm_trace_export(struct trace_export **list, struct trace_export *export)
 355 {
 356         struct trace_export **p;
 357
 358         for (p = list; *p != NULL; p = &(*p)->next)
 359                 if (*p == export)
 360                         break;
 361
 362         if (*p != export)
 363                 return -1;
 364
 365         rcu_assign_pointer(*p, (*p)->next);
 366
 367         return 0;
 368 }
 369
 370 static inline void
 371 add_ftrace_export(struct trace_export **list, struct trace_export *export)
 372 {
 373         ftrace_exports_enable(export);
 374
 375         add_trace_export(list, export);
 376 }
 377
 378 static inline int
 379 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
 380 {
 381         int ret;
 382
 383         ret = rm_trace_export(list, export);
 384         ftrace_exports_disable(export);
 385
 386         return ret;
 387 }
 388
 389 int register_ftrace_export(struct trace_export *export)
 390 {
 391         if (WARN_ON_ONCE(!export->write))
 392                 return -1;
 393
 394         mutex_lock(&ftrace_export_lock);
 395
 396         add_ftrace_export(&ftrace_exports_list, export);
 397
 398         mutex_unlock(&ftrace_export_lock);
 399
 400         return 0;
 401 }
 402 EXPORT_SYMBOL_GPL(register_ftrace_export);
 403
 404 int unregister_ftrace_export(struct trace_export *export)
 405 {
 406         int ret;
 407
 408         mutex_lock(&ftrace_export_lock);
 409
 410         ret = rm_ftrace_export(&ftrace_exports_list, export);
 411
 412         mutex_unlock(&ftrace_export_lock);
 413
 414         return ret;
 415 }
 416 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
 417
 418 /* trace_flags holds trace_options default values */
 419 #define TRACE_DEFAULT_FLAGS                                             \
 420         (FUNCTION_DEFAULT_FLAGS |                                       \
 421          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
 422          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
 423          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
 424          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
 425          TRACE_ITER_HASH_PTR)
 426
 427 /* trace_options that are only supported by global_trace */
 428 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
 429                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
 430
 431 /* trace_flags that are default zero for instances */
 432 #define ZEROED_TRACE_FLAGS \
 433         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
 434
 435 /*
 436  * The global_trace is the descriptor that holds the top-level tracing
 437  * buffers for the live tracing.
 438  */
 439 static struct trace_array global_trace = {
 440         .trace_flags = TRACE_DEFAULT_FLAGS,
 441 };
 442
 443 LIST_HEAD(ftrace_trace_arrays);
 444
 445 int trace_array_get(struct trace_array *this_tr)
 446 {
 447         struct trace_array *tr;
 448         int ret = -ENODEV;
 449
 450         mutex_lock(&trace_types_lock);
 451         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 452                 if (tr == this_tr) {
 453                         tr->ref++;
 454                         ret = 0;
 455                         break;
 456                 }
 457         }
 458         mutex_unlock(&trace_types_lock);
 459
 460         return ret;
 461 }
 462
 463 static void __trace_array_put(struct trace_array *this_tr)
 464 {
 465         WARN_ON(!this_tr->ref);
 466         this_tr->ref--;
 467 }
 468
 469 /**
 470  * trace_array_put - Decrement the reference counter for this trace array.
 471  * @this_tr : pointer to the trace array
 472  *
 473  * NOTE: Use this when we no longer need the trace array returned by
 474  * trace_array_get_by_name(). This ensures the trace array can be later
 475  * destroyed.
 476  *
 477  */
 478 void trace_array_put(struct trace_array *this_tr)
 479 {
 480         if (!this_tr)
 481                 return;
 482
 483         mutex_lock(&trace_types_lock);
 484         __trace_array_put(this_tr);
 485         mutex_unlock(&trace_types_lock);
 486 }
 487 EXPORT_SYMBOL_GPL(trace_array_put);
 488
 489 int tracing_check_open_get_tr(struct trace_array *tr)
 490 {
 491         int ret;
 492
 493         ret = security_locked_down(LOCKDOWN_TRACEFS);
 494         if (ret)
 495                 return ret;
 496
 497         if (tracing_disabled)
 498                 return -ENODEV;
 499
 500         if (tr && trace_array_get(tr) < 0)
 501                 return -ENODEV;
 502
 503         return 0;
 504 }
 505
 506 int call_filter_check_discard(struct trace_event_call *call, void *rec,
 507                               struct trace_buffer *buffer,
 508                               struct ring_buffer_event *event)
 509 {
 510         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 511             !filter_match_preds(call->filter, rec)) {
 512                 __trace_event_discard_commit(buffer, event);
 513                 return 1;
 514         }
 515
 516         return 0;
 517 }
 518
 519 void trace_free_pid_list(struct trace_pid_list *pid_list)
 520 {
 521         vfree(pid_list->pids);
 522         kfree(pid_list);
 523 }
 524
 525 /**
 526  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
 527  * @filtered_pids: The list of pids to check
 528  * @search_pid: The PID to find in @filtered_pids
 529  *
 530  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
 531  */
 532 bool
 533 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 534 {
 535         /*
 536          * If pid_max changed after filtered_pids was created, we
 537          * by default ignore all pids greater than the previous pid_max.
 538          */
 539         if (search_pid >= filtered_pids->pid_max)
 540                 return false;
 541
 542         return test_bit(search_pid, filtered_pids->pids);
 543 }
 544
 545 /**
 546  * trace_ignore_this_task - should a task be ignored for tracing
 547  * @filtered_pids: The list of pids to check
 548  * @filtered_no_pids: The list of pids not to be traced
 549  * @task: The task that should be ignored if not filtered
 550  *
 551  * Checks if @task should be traced or not from @filtered_pids.
 552  * Returns true if @task should *NOT* be traced.
 553  * Returns false if @task should be traced.
 554  */
 555 bool
 556 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
 557                        struct trace_pid_list *filtered_no_pids,
 558                        struct task_struct *task)
 559 {
 560         /*
 561          * If filtered_no_pids is not empty, and the task's pid is listed
 562          * in filtered_no_pids, then return true.
 563          * Otherwise, if filtered_pids is empty, that means we can
 564          * trace all tasks. If it has content, then only trace pids
 565          * within filtered_pids.
 566          */
 567
 568         return (filtered_pids &&
 569                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
 570                 (filtered_no_pids &&
 571                  trace_find_filtered_pid(filtered_no_pids, task->pid));
 572 }
 573
 574 /**
 575  * trace_filter_add_remove_task - Add or remove a task from a pid_list
 576  * @pid_list: The list to modify
 577  * @self: The current task for fork or NULL for exit
 578  * @task: The task to add or remove
 579  *
 580  * If adding a task, if @self is defined, the task is only added if @self
 581  * is also included in @pid_list. This happens on fork and tasks should
 582  * only be added when the parent is listed. If @self is NULL, then the
 583  * @task pid will be removed from the list, which would happen on exit
 584  * of a task.
 585  */
 586 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 587                                   struct task_struct *self,
 588                                   struct task_struct *task)
 589 {
 590         if (!pid_list)
 591                 return;
 592
 593         /* For forks, we only add if the forking task is listed */
 594         if (self) {
 595                 if (!trace_find_filtered_pid(pid_list, self->pid))
 596                         return;
 597         }
 598
 599         /* Sorry, but we don't support pid_max changing after setting */
 600         if (task->pid >= pid_list->pid_max)
 601                 return;
 602
 603         /* "self" is set for forks, and NULL for exits */
 604         if (self)
 605                 set_bit(task->pid, pid_list->pids);
 606         else
 607                 clear_bit(task->pid, pid_list->pids);
 608 }
 609
 610 /**
 611  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
 612  * @pid_list: The pid list to show
 613  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
 614  * @pos: The position of the file
 615  *
 616  * This is used by the seq_file "next" operation to iterate the pids
 617  * listed in a trace_pid_list structure.
 618  *
 619  * Returns the pid+1 as we want to display pid of zero, but NULL would
 620  * stop the iteration.
 621  */
 622 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 623 {
 624         unsigned long pid = (unsigned long)v;
 625
 626         (*pos)++;
 627
 628         /* pid already is +1 of the actual previous bit */
 629         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 630
 631         /* Return pid + 1 to allow zero to be represented */
 632         if (pid < pid_list->pid_max)
 633                 return (void *)(pid + 1);
 634
 635         return NULL;
 636 }
 637
 638 /**
 639  * trace_pid_start - Used for seq_file to start reading pid lists
 640  * @pid_list: The pid list to show
 641  * @pos: The position of the file
 642  *
 643  * This is used by seq_file "start" operation to start the iteration
 644  * of listing pids.
 645  *
 646  * Returns the pid+1 as we want to display pid of zero, but NULL would
 647  * stop the iteration.
 648  */
 649 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
 650 {
 651         unsigned long pid;
 652         loff_t l = 0;
 653
 654         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
 655         if (pid >= pid_list->pid_max)
 656                 return NULL;
 657
 658         /* Return pid + 1 so that zero can be the exit value */
 659         for (pid++; pid && l < *pos;
 660              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
 661                 ;
 662         return (void *)pid;
 663 }
 664
 665 /**
 666  * trace_pid_show - show the current pid in seq_file processing
 667  * @m: The seq_file structure to write into
 668  * @v: A void pointer of the pid (+1) value to display
 669  *
 670  * Can be directly used by seq_file operations to display the current
 671  * pid value.
 672  */
 673 int trace_pid_show(struct seq_file *m, void *v)
 674 {
 675         unsigned long pid = (unsigned long)v - 1;
 676
 677         seq_printf(m, "%lu\n", pid);
 678         return 0;
 679 }
 680
 681 /* 128 should be much more than enough */
 682 #define PID_BUF_SIZE            127
 683
 684 int trace_pid_write(struct trace_pid_list *filtered_pids,
 685                     struct trace_pid_list **new_pid_list,
 686                     const char __user *ubuf, size_t cnt)
 687 {
 688         struct trace_pid_list *pid_list;
 689         struct trace_parser parser;
 690         unsigned long val;
 691         int nr_pids = 0;
 692         ssize_t read = 0;
 693         ssize_t ret = 0;
 694         loff_t pos;
 695         pid_t pid;
 696
 697         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
 698                 return -ENOMEM;
 699
 700         /*
 701          * Always recreate a new array. The write is an all or nothing
 702          * operation. Always create a new array when adding new pids by
 703          * the user. If the operation fails, then the current list is
 704          * not modified.
 705          */
 706         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
 707         if (!pid_list) {
 708                 trace_parser_put(&parser);
 709                 return -ENOMEM;
 710         }
 711
 712         pid_list->pid_max = READ_ONCE(pid_max);
 713
 714         /* Only truncating will shrink pid_max */
 715         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
 716                 pid_list->pid_max = filtered_pids->pid_max;
 717
 718         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
 719         if (!pid_list->pids) {
 720                 trace_parser_put(&parser);
 721                 kfree(pid_list);
 722                 return -ENOMEM;
 723         }
 724
 725         if (filtered_pids) {
 726                 /* copy the current bits to the new max */
 727                 for_each_set_bit(pid, filtered_pids->pids,
 728                                  filtered_pids->pid_max) {
 729                         set_bit(pid, pid_list->pids);
 730                         nr_pids++;
 731                 }
 732         }
 733
 734         while (cnt > 0) {
 735
 736                 pos = 0;
 737
 738                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
 739                 if (ret < 0 || !trace_parser_loaded(&parser))
 740                         break;
 741
 742                 read += ret;
 743                 ubuf += ret;
 744                 cnt -= ret;
 745
 746                 ret = -EINVAL;
 747                 if (kstrtoul(parser.buffer, 0, &val))
 748                         break;
 749                 if (val >= pid_list->pid_max)
 750                         break;
 751
 752                 pid = (pid_t)val;
 753
 754                 set_bit(pid, pid_list->pids);
 755                 nr_pids++;
 756
 757                 trace_parser_clear(&parser);
 758                 ret = 0;
 759         }
 760         trace_parser_put(&parser);
 761
 762         if (ret < 0) {
 763                 trace_free_pid_list(pid_list);
 764                 return ret;
 765         }
 766
 767         if (!nr_pids) {
 768                 /* Cleared the list of pids */
 769                 trace_free_pid_list(pid_list);
 770                 read = ret;
 771                 pid_list = NULL;
 772         }
 773
 774         *new_pid_list = pid_list;
 775
 776         return read;
 777 }
 778
 779 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
 780 {
 781         u64 ts;
 782
 783         /* Early boot up does not have a buffer yet */
 784         if (!buf->buffer)
 785                 return trace_clock_local();
 786
 787         ts = ring_buffer_time_stamp(buf->buffer);
 788         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 789
 790         return ts;
 791 }
 792
 793 u64 ftrace_now(int cpu)
 794 {
 795         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
 796 }
 797
 798 /**
 799  * tracing_is_enabled - Show if global_trace has been enabled
 800  *
 801  * Shows if the global trace has been enabled or not. It uses the
 802  * mirror flag "buffer_disabled" to be used in fast paths such as for
 803  * the irqsoff tracer. But it may be inaccurate due to races. If you
 804  * need to know the accurate state, use tracing_is_on() which is a little
 805  * slower, but accurate.
 806  */
 807 int tracing_is_enabled(void)
 808 {
 809         /*
 810          * For quick access (irqsoff uses this in fast path), just
 811          * return the mirror variable of the state of the ring buffer.
 812          * It's a little racy, but we don't really care.
 813          */
 814         smp_rmb();
 815         return !global_trace.buffer_disabled;
 816 }
 817
 818 /*
 819  * trace_buf_size is the size in bytes that is allocated
 820  * for a buffer. Note, the number of bytes is always rounded
 821  * to page size.
 822  *
 823  * This number is purposely set to a low number of 16384.
 824  * If the dump on oops happens, it will be much appreciated
 825  * to not have to wait for all that output. Anyway this can be
 826  * boot time and run time configurable.
 827  */
 828 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
 829
 830 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 831
 832 /* trace_types holds a link list of available tracers. */
 833 static struct tracer            *trace_types __read_mostly;
 834
 835 /*
 836  * trace_types_lock is used to protect the trace_types list.
 837  */
 838 DEFINE_MUTEX(trace_types_lock);
 839
 840 /*
 841  * serialize the access of the ring buffer
 842  *
 843  * ring buffer serializes readers, but it is low level protection.
 844  * The validity of the events (which returns by ring_buffer_peek() ..etc)
 845  * are not protected by ring buffer.
 846  *
 847  * The content of events may become garbage if we allow other process consumes
 848  * these events concurrently:
 849  *   A) the page of the consumed events may become a normal page
 850  *      (not reader page) in ring buffer, and this page will be rewritten
 851  *      by events producer.
 852  *   B) The page of the consumed events may become a page for splice_read,
 853  *      and this page will be returned to system.
 854  *
 855  * These primitives allow multi process access to different cpu ring buffer
 856  * concurrently.
 857  *
 858  * These primitives don't distinguish read-only and read-consume access.
 859  * Multi read-only access are also serialized.
 860  */
 861
 862 #ifdef CONFIG_SMP
 863 static DECLARE_RWSEM(all_cpu_access_lock);
 864 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
 865
 866 static inline void trace_access_lock(int cpu)
 867 {
 868         if (cpu == RING_BUFFER_ALL_CPUS) {
 869                 /* gain it for accessing the whole ring buffer. */
 870                 down_write(&all_cpu_access_lock);
 871         } else {
 872                 /* gain it for accessing a cpu ring buffer. */
 873
 874                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
 875                 down_read(&all_cpu_access_lock);
 876
 877                 /* Secondly block other access to this @cpu ring buffer. */
 878                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
 879         }
 880 }
 881
 882 static inline void trace_access_unlock(int cpu)
 883 {
 884         if (cpu == RING_BUFFER_ALL_CPUS) {
 885                 up_write(&all_cpu_access_lock);
 886         } else {
 887                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
 888                 up_read(&all_cpu_access_lock);
 889         }
 890 }
 891
 892 static inline void trace_access_lock_init(void)
 893 {
 894         int cpu;
 895
 896         for_each_possible_cpu(cpu)
 897                 mutex_init(&per_cpu(cpu_access_lock, cpu));
 898 }
 899
 900 #else
 901
 902 static DEFINE_MUTEX(access_lock);
 903
 904 static inline void trace_access_lock(int cpu)
 905 {
 906         (void)cpu;
 907         mutex_lock(&access_lock);
 908 }
 909
 910 static inline void trace_access_unlock(int cpu)
 911 {
 912         (void)cpu;
 913         mutex_unlock(&access_lock);
 914 }
 915
 916 static inline void trace_access_lock_init(void)
 917 {
 918 }
 919
 920 #endif
 921
 922 #ifdef CONFIG_STACKTRACE
 923 static void __ftrace_trace_stack(struct trace_buffer *buffer,
 924                                  unsigned int trace_ctx,
 925                                  int skip, struct pt_regs *regs);
 926 static inline void ftrace_trace_stack(struct trace_array *tr,
 927                                       struct trace_buffer *buffer,
 928                                       unsigned int trace_ctx,
 929                                       int skip, struct pt_regs *regs);
 930
 931 #else
 932 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
 933                                         unsigned int trace_ctx,
 934                                         int skip, struct pt_regs *regs)
 935 {
 936 }
 937 static inline void ftrace_trace_stack(struct trace_array *tr,
 938                                       struct trace_buffer *buffer,
 939                                       unsigned long trace_ctx,
 940                                       int skip, struct pt_regs *regs)
 941 {
 942 }
 943
 944 #endif
 945
 946 static __always_inline void
 947 trace_event_setup(struct ring_buffer_event *event,
 948                   int type, unsigned int trace_ctx)
 949 {
 950         struct trace_entry *ent = ring_buffer_event_data(event);
 951
 952         tracing_generic_entry_update(ent, type, trace_ctx);
 953 }
 954
 955 static __always_inline struct ring_buffer_event *
 956 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
 957                           int type,
 958                           unsigned long len,
 959                           unsigned int trace_ctx)
 960 {
 961         struct ring_buffer_event *event;
 962
 963         event = ring_buffer_lock_reserve(buffer, len);
 964         if (event != NULL)
 965                 trace_event_setup(event, type, trace_ctx);
 966
 967         return event;
 968 }
 969
 970 void tracer_tracing_on(struct trace_array *tr)
 971 {
 972         if (tr->array_buffer.buffer)
 973                 ring_buffer_record_on(tr->array_buffer.buffer);
 974         /*
 975          * This flag is looked at when buffers haven't been allocated
 976          * yet, or by some tracers (like irqsoff), that just want to
 977          * know if the ring buffer has been disabled, but it can handle
 978          * races of where it gets disabled but we still do a record.
 979          * As the check is in the fast path of the tracers, it is more
 980          * important to be fast than accurate.
 981          */
 982         tr->buffer_disabled = 0;
 983         /* Make the flag seen by readers */
 984         smp_wmb();
 985 }
 986
 987 /**
 988  * tracing_on - enable tracing buffers
 989  *
 990  * This function enables tracing buffers that may have been
 991  * disabled with tracing_off.
 992  */
 993 void tracing_on(void)
 994 {
 995         tracer_tracing_on(&global_trace);
 996 }
 997 EXPORT_SYMBOL_GPL(tracing_on);
 998
 999
1000 static __always_inline void
1001 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1002 {
1003         __this_cpu_write(trace_taskinfo_save, true);
1004
1005         /* If this is the temp buffer, we need to commit fully */
1006         if (this_cpu_read(trace_buffered_event) == event) {
1007                 /* Length is in event->array[0] */
1008                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1009                 /* Release the temp buffer */
1010                 this_cpu_dec(trace_buffered_event_cnt);
1011         } else
1012                 ring_buffer_unlock_commit(buffer, event);
1013 }
1014
1015 /**
1016  * __trace_puts - write a constant string into the trace buffer.
1017  * @ip:    The address of the caller
1018  * @str:   The constant string to write
1019  * @size:  The size of the string.
1020  */
1021 int __trace_puts(unsigned long ip, const char *str, int size)
1022 {
1023         struct ring_buffer_event *event;
1024         struct trace_buffer *buffer;
1025         struct print_entry *entry;
1026         unsigned int trace_ctx;
1027         int alloc;
1028
1029         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1030                 return 0;
1031
1032         if (unlikely(tracing_selftest_running || tracing_disabled))
1033                 return 0;
1034
1035         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1036
1037         trace_ctx = tracing_gen_ctx();
1038         buffer = global_trace.array_buffer.buffer;
1039         ring_buffer_nest_start(buffer);
1040         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1041                                             trace_ctx);
1042         if (!event) {
1043                 size = 0;
1044                 goto out;
1045         }
1046
1047         entry = ring_buffer_event_data(event);
1048         entry->ip = ip;
1049
1050         memcpy(&entry->buf, str, size);
1051
1052         /* Add a newline if necessary */
1053         if (entry->buf[size - 1] != '\n') {
1054                 entry->buf[size] = '\n';
1055                 entry->buf[size + 1] = '\0';
1056         } else
1057                 entry->buf[size] = '\0';
1058
1059         __buffer_unlock_commit(buffer, event);
1060         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1061  out:
1062         ring_buffer_nest_end(buffer);
1063         return size;
1064 }
1065 EXPORT_SYMBOL_GPL(__trace_puts);
1066
1067 /**
1068  * __trace_bputs - write the pointer to a constant string into trace buffer
1069  * @ip:    The address of the caller
1070  * @str:   The constant string to write to the buffer to
1071  */
1072 int __trace_bputs(unsigned long ip, const char *str)
1073 {
1074         struct ring_buffer_event *event;
1075         struct trace_buffer *buffer;
1076         struct bputs_entry *entry;
1077         unsigned int trace_ctx;
1078         int size = sizeof(struct bputs_entry);
1079         int ret = 0;
1080
1081         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1082                 return 0;
1083
1084         if (unlikely(tracing_selftest_running || tracing_disabled))
1085                 return 0;
1086
1087         trace_ctx = tracing_gen_ctx();
1088         buffer = global_trace.array_buffer.buffer;
1089
1090         ring_buffer_nest_start(buffer);
1091         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1092                                             trace_ctx);
1093         if (!event)
1094                 goto out;
1095
1096         entry = ring_buffer_event_data(event);
1097         entry->ip                       = ip;
1098         entry->str                      = str;
1099
1100         __buffer_unlock_commit(buffer, event);
1101         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1102
1103         ret = 1;
1104  out:
1105         ring_buffer_nest_end(buffer);
1106         return ret;
1107 }
1108 EXPORT_SYMBOL_GPL(__trace_bputs);
1109
1110 #ifdef CONFIG_TRACER_SNAPSHOT
1111 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1112                                            void *cond_data)
1113 {
1114         struct tracer *tracer = tr->current_trace;
1115         unsigned long flags;
1116
1117         if (in_nmi()) {
1118                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1119                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1120                 return;
1121         }
1122
1123         if (!tr->allocated_snapshot) {
1124                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1125                 internal_trace_puts("*** stopping trace here!   ***\n");
1126                 tracing_off();
1127                 return;
1128         }
1129
1130         /* Note, snapshot can not be used when the tracer uses it */
1131         if (tracer->use_max_tr) {
1132                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1133                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1134                 return;
1135         }
1136
1137         local_irq_save(flags);
1138         update_max_tr(tr, current, smp_processor_id(), cond_data);
1139         local_irq_restore(flags);
1140 }
1141
1142 void tracing_snapshot_instance(struct trace_array *tr)
1143 {
1144         tracing_snapshot_instance_cond(tr, NULL);
1145 }
1146
1147 /**
1148  * tracing_snapshot - take a snapshot of the current buffer.
1149  *
1150  * This causes a swap between the snapshot buffer and the current live
1151  * tracing buffer. You can use this to take snapshots of the live
1152  * trace when some condition is triggered, but continue to trace.
1153  *
1154  * Note, make sure to allocate the snapshot with either
1155  * a tracing_snapshot_alloc(), or by doing it manually
1156  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1157  *
1158  * If the snapshot buffer is not allocated, it will stop tracing.
1159  * Basically making a permanent snapshot.
1160  */
1161 void tracing_snapshot(void)
1162 {
1163         struct trace_array *tr = &global_trace;
1164
1165         tracing_snapshot_instance(tr);
1166 }
1167 EXPORT_SYMBOL_GPL(tracing_snapshot);
1168
1169 /**
1170  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1171  * @tr:         The tracing instance to snapshot
1172  * @cond_data:  The data to be tested conditionally, and possibly saved
1173  *
1174  * This is the same as tracing_snapshot() except that the snapshot is
1175  * conditional - the snapshot will only happen if the
1176  * cond_snapshot.update() implementation receiving the cond_data
1177  * returns true, which means that the trace array's cond_snapshot
1178  * update() operation used the cond_data to determine whether the
1179  * snapshot should be taken, and if it was, presumably saved it along
1180  * with the snapshot.
1181  */
1182 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1183 {
1184         tracing_snapshot_instance_cond(tr, cond_data);
1185 }
1186 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1187
1188 /**
1189  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1190  * @tr:         The tracing instance
1191  *
1192  * When the user enables a conditional snapshot using
1193  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1194  * with the snapshot.  This accessor is used to retrieve it.
1195  *
1196  * Should not be called from cond_snapshot.update(), since it takes
1197  * the tr->max_lock lock, which the code calling
1198  * cond_snapshot.update() has already done.
1199  *
1200  * Returns the cond_data associated with the trace array's snapshot.
1201  */
1202 void *tracing_cond_snapshot_data(struct trace_array *tr)
1203 {
1204         void *cond_data = NULL;
1205
1206         arch_spin_lock(&tr->max_lock);
1207
1208         if (tr->cond_snapshot)
1209                 cond_data = tr->cond_snapshot->cond_data;
1210
1211         arch_spin_unlock(&tr->max_lock);
1212
1213         return cond_data;
1214 }
1215 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1216
1217 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1218                                         struct array_buffer *size_buf, int cpu_id);
1219 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1220
1221 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1222 {
1223         int ret;
1224
1225         if (!tr->allocated_snapshot) {
1226
1227                 /* allocate spare buffer */
1228                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1229                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1230                 if (ret < 0)
1231                         return ret;
1232
1233                 tr->allocated_snapshot = true;
1234         }
1235
1236         return 0;
1237 }
1238
1239 static void free_snapshot(struct trace_array *tr)
1240 {
1241         /*
1242          * We don't free the ring buffer. instead, resize it because
1243          * The max_tr ring buffer has some state (e.g. ring->clock) and
1244          * we want preserve it.
1245          */
1246         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1247         set_buffer_entries(&tr->max_buffer, 1);
1248         tracing_reset_online_cpus(&tr->max_buffer);
1249         tr->allocated_snapshot = false;
1250 }
1251
1252 /**
1253  * tracing_alloc_snapshot - allocate snapshot buffer.
1254  *
1255  * This only allocates the snapshot buffer if it isn't already
1256  * allocated - it doesn't also take a snapshot.
1257  *
1258  * This is meant to be used in cases where the snapshot buffer needs
1259  * to be set up for events that can't sleep but need to be able to
1260  * trigger a snapshot.
1261  */
1262 int tracing_alloc_snapshot(void)
1263 {
1264         struct trace_array *tr = &global_trace;
1265         int ret;
1266
1267         ret = tracing_alloc_snapshot_instance(tr);
1268         WARN_ON(ret < 0);
1269
1270         return ret;
1271 }
1272 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1273
1274 /**
1275  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1276  *
1277  * This is similar to tracing_snapshot(), but it will allocate the
1278  * snapshot buffer if it isn't already allocated. Use this only
1279  * where it is safe to sleep, as the allocation may sleep.
1280  *
1281  * This causes a swap between the snapshot buffer and the current live
1282  * tracing buffer. You can use this to take snapshots of the live
1283  * trace when some condition is triggered, but continue to trace.
1284  */
1285 void tracing_snapshot_alloc(void)
1286 {
1287         int ret;
1288
1289         ret = tracing_alloc_snapshot();
1290         if (ret < 0)
1291                 return;
1292
1293         tracing_snapshot();
1294 }
1295 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1296
1297 /**
1298  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1299  * @tr:         The tracing instance
1300  * @cond_data:  User data to associate with the snapshot
1301  * @update:     Implementation of the cond_snapshot update function
1302  *
1303  * Check whether the conditional snapshot for the given instance has
1304  * already been enabled, or if the current tracer is already using a
1305  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1306  * save the cond_data and update function inside.
1307  *
1308  * Returns 0 if successful, error otherwise.
1309  */
1310 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1311                                  cond_update_fn_t update)
1312 {
1313         struct cond_snapshot *cond_snapshot;
1314         int ret = 0;
1315
1316         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1317         if (!cond_snapshot)
1318                 return -ENOMEM;
1319
1320         cond_snapshot->cond_data = cond_data;
1321         cond_snapshot->update = update;
1322
1323         mutex_lock(&trace_types_lock);
1324
1325         ret = tracing_alloc_snapshot_instance(tr);
1326         if (ret)
1327                 goto fail_unlock;
1328
1329         if (tr->current_trace->use_max_tr) {
1330                 ret = -EBUSY;
1331                 goto fail_unlock;
1332         }
1333
1334         /*
1335          * The cond_snapshot can only change to NULL without the
1336          * trace_types_lock. We don't care if we race with it going
1337          * to NULL, but we want to make sure that it's not set to
1338          * something other than NULL when we get here, which we can
1339          * do safely with only holding the trace_types_lock and not
1340          * having to take the max_lock.
1341          */
1342         if (tr->cond_snapshot) {
1343                 ret = -EBUSY;
1344                 goto fail_unlock;
1345         }
1346
1347         arch_spin_lock(&tr->max_lock);
1348         tr->cond_snapshot = cond_snapshot;
1349         arch_spin_unlock(&tr->max_lock);
1350
1351         mutex_unlock(&trace_types_lock);
1352
1353         return ret;
1354
1355  fail_unlock:
1356         mutex_unlock(&trace_types_lock);
1357         kfree(cond_snapshot);
1358         return ret;
1359 }
1360 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1361
1362 /**
1363  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1364  * @tr:         The tracing instance
1365  *
1366  * Check whether the conditional snapshot for the given instance is
1367  * enabled; if so, free the cond_snapshot associated with it,
1368  * otherwise return -EINVAL.
1369  *
1370  * Returns 0 if successful, error otherwise.
1371  */
1372 int tracing_snapshot_cond_disable(struct trace_array *tr)
1373 {
1374         int ret = 0;
1375
1376         arch_spin_lock(&tr->max_lock);
1377
1378         if (!tr->cond_snapshot)
1379                 ret = -EINVAL;
1380         else {
1381                 kfree(tr->cond_snapshot);
1382                 tr->cond_snapshot = NULL;
1383         }
1384
1385         arch_spin_unlock(&tr->max_lock);
1386
1387         return ret;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1390 #else
1391 void tracing_snapshot(void)
1392 {
1393         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_snapshot);
1396 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1397 {
1398         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1401 int tracing_alloc_snapshot(void)
1402 {
1403         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1404         return -ENODEV;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1407 void tracing_snapshot_alloc(void)
1408 {
1409         /* Give warning */
1410         tracing_snapshot();
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1413 void *tracing_cond_snapshot_data(struct trace_array *tr)
1414 {
1415         return NULL;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1418 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1419 {
1420         return -ENODEV;
1421 }
1422 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1423 int tracing_snapshot_cond_disable(struct trace_array *tr)
1424 {
1425         return false;
1426 }
1427 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1428 #endif /* CONFIG_TRACER_SNAPSHOT */
1429
1430 void tracer_tracing_off(struct trace_array *tr)
1431 {
1432         if (tr->array_buffer.buffer)
1433                 ring_buffer_record_off(tr->array_buffer.buffer);
1434         /*
1435          * This flag is looked at when buffers haven't been allocated
1436          * yet, or by some tracers (like irqsoff), that just want to
1437          * know if the ring buffer has been disabled, but it can handle
1438          * races of where it gets disabled but we still do a record.
1439          * As the check is in the fast path of the tracers, it is more
1440          * important to be fast than accurate.
1441          */
1442         tr->buffer_disabled = 1;
1443         /* Make the flag seen by readers */
1444         smp_wmb();
1445 }
1446
1447 /**
1448  * tracing_off - turn off tracing buffers
1449  *
1450  * This function stops the tracing buffers from recording data.
1451  * It does not disable any overhead the tracers themselves may
1452  * be causing. This function simply causes all recording to
1453  * the ring buffers to fail.
1454  */
1455 void tracing_off(void)
1456 {
1457         tracer_tracing_off(&global_trace);
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_off);
1460
1461 void disable_trace_on_warning(void)
1462 {
1463         if (__disable_trace_on_warning) {
1464                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1465                         "Disabling tracing due to warning\n");
1466                 tracing_off();
1467         }
1468 }
1469
1470 /**
1471  * tracer_tracing_is_on - show real state of ring buffer enabled
1472  * @tr : the trace array to know if ring buffer is enabled
1473  *
1474  * Shows real state of the ring buffer if it is enabled or not.
1475  */
1476 bool tracer_tracing_is_on(struct trace_array *tr)
1477 {
1478         if (tr->array_buffer.buffer)
1479                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1480         return !tr->buffer_disabled;
1481 }
1482
1483 /**
1484  * tracing_is_on - show state of ring buffers enabled
1485  */
1486 int tracing_is_on(void)
1487 {
1488         return tracer_tracing_is_on(&global_trace);
1489 }
1490 EXPORT_SYMBOL_GPL(tracing_is_on);
1491
1492 static int __init set_buf_size(char *str)
1493 {
1494         unsigned long buf_size;
1495
1496         if (!str)
1497                 return 0;
1498         buf_size = memparse(str, &str);
1499         /*
1500          * nr_entries can not be zero and the startup
1501          * tests require some buffer space. Therefore
1502          * ensure we have at least 4096 bytes of buffer.
1503          */
1504         trace_buf_size = max(4096UL, buf_size);
1505         return 1;
1506 }
1507 __setup("trace_buf_size=", set_buf_size);
1508
1509 static int __init set_tracing_thresh(char *str)
1510 {
1511         unsigned long threshold;
1512         int ret;
1513
1514         if (!str)
1515                 return 0;
1516         ret = kstrtoul(str, 0, &threshold);
1517         if (ret < 0)
1518                 return 0;
1519         tracing_thresh = threshold * 1000;
1520         return 1;
1521 }
1522 __setup("tracing_thresh=", set_tracing_thresh);
1523
1524 unsigned long nsecs_to_usecs(unsigned long nsecs)
1525 {
1526         return nsecs / 1000;
1527 }
1528
1529 /*
1530  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1531  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1532  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1533  * of strings in the order that the evals (enum) were defined.
1534  */
1535 #undef C
1536 #define C(a, b) b
1537
1538 /* These must match the bit positions in trace_iterator_flags */
1539 static const char *trace_options[] = {
1540         TRACE_FLAGS
1541         NULL
1542 };
1543
1544 static struct {
1545         u64 (*func)(void);
1546         const char *name;
1547         int in_ns;              /* is this clock in nanoseconds? */
1548 } trace_clocks[] = {
1549         { trace_clock_local,            "local",        1 },
1550         { trace_clock_global,           "global",       1 },
1551         { trace_clock_counter,          "counter",      0 },
1552         { trace_clock_jiffies,          "uptime",       0 },
1553         { trace_clock,                  "perf",         1 },
1554         { ktime_get_mono_fast_ns,       "mono",         1 },
1555         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1556         { ktime_get_boot_fast_ns,       "boot",         1 },
1557         ARCH_TRACE_CLOCKS
1558 };
1559
1560 bool trace_clock_in_ns(struct trace_array *tr)
1561 {
1562         if (trace_clocks[tr->clock_id].in_ns)
1563                 return true;
1564
1565         return false;
1566 }
1567
1568 /*
1569  * trace_parser_get_init - gets the buffer for trace parser
1570  */
1571 int trace_parser_get_init(struct trace_parser *parser, int size)
1572 {
1573         memset(parser, 0, sizeof(*parser));
1574
1575         parser->buffer = kmalloc(size, GFP_KERNEL);
1576         if (!parser->buffer)
1577                 return 1;
1578
1579         parser->size = size;
1580         return 0;
1581 }
1582
1583 /*
1584  * trace_parser_put - frees the buffer for trace parser
1585  */
1586 void trace_parser_put(struct trace_parser *parser)
1587 {
1588         kfree(parser->buffer);
1589         parser->buffer = NULL;
1590 }
1591
1592 /*
1593  * trace_get_user - reads the user input string separated by  space
1594  * (matched by isspace(ch))
1595  *
1596  * For each string found the 'struct trace_parser' is updated,
1597  * and the function returns.
1598  *
1599  * Returns number of bytes read.
1600  *
1601  * See kernel/trace/trace.h for 'struct trace_parser' details.
1602  */
1603 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1604         size_t cnt, loff_t *ppos)
1605 {
1606         char ch;
1607         size_t read = 0;
1608         ssize_t ret;
1609
1610         if (!*ppos)
1611                 trace_parser_clear(parser);
1612
1613         ret = get_user(ch, ubuf++);
1614         if (ret)
1615                 goto out;
1616
1617         read++;
1618         cnt--;
1619
1620         /*
1621          * The parser is not finished with the last write,
1622          * continue reading the user input without skipping spaces.
1623          */
1624         if (!parser->cont) {
1625                 /* skip white space */
1626                 while (cnt && isspace(ch)) {
1627                         ret = get_user(ch, ubuf++);
1628                         if (ret)
1629                                 goto out;
1630                         read++;
1631                         cnt--;
1632                 }
1633
1634                 parser->idx = 0;
1635
1636                 /* only spaces were written */
1637                 if (isspace(ch) || !ch) {
1638                         *ppos += read;
1639                         ret = read;
1640                         goto out;
1641                 }
1642         }
1643
1644         /* read the non-space input */
1645         while (cnt && !isspace(ch) && ch) {
1646                 if (parser->idx < parser->size - 1)
1647                         parser->buffer[parser->idx++] = ch;
1648                 else {
1649                         ret = -EINVAL;
1650                         goto out;
1651                 }
1652                 ret = get_user(ch, ubuf++);
1653                 if (ret)
1654                         goto out;
1655                 read++;
1656                 cnt--;
1657         }
1658
1659         /* We either got finished input or we have to wait for another call. */
1660         if (isspace(ch) || !ch) {
1661                 parser->buffer[parser->idx] = 0;
1662                 parser->cont = false;
1663         } else if (parser->idx < parser->size - 1) {
1664                 parser->cont = true;
1665                 parser->buffer[parser->idx++] = ch;
1666                 /* Make sure the parsed string always terminates with '\0'. */
1667                 parser->buffer[parser->idx] = 0;
1668         } else {
1669                 ret = -EINVAL;
1670                 goto out;
1671         }
1672
1673         *ppos += read;
1674         ret = read;
1675
1676 out:
1677         return ret;
1678 }
1679
1680 /* TODO add a seq_buf_to_buffer() */
1681 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1682 {
1683         int len;
1684
1685         if (trace_seq_used(s) <= s->seq.readpos)
1686                 return -EBUSY;
1687
1688         len = trace_seq_used(s) - s->seq.readpos;
1689         if (cnt > len)
1690                 cnt = len;
1691         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1692
1693         s->seq.readpos += cnt;
1694         return cnt;
1695 }
1696
1697 unsigned long __read_mostly     tracing_thresh;
1698 static const struct file_operations tracing_max_lat_fops;
1699
1700 #ifdef LATENCY_FS_NOTIFY
1701
1702 static struct workqueue_struct *fsnotify_wq;
1703
1704 static void latency_fsnotify_workfn(struct work_struct *work)
1705 {
1706         struct trace_array *tr = container_of(work, struct trace_array,
1707                                               fsnotify_work);
1708         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1709 }
1710
1711 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1712 {
1713         struct trace_array *tr = container_of(iwork, struct trace_array,
1714                                               fsnotify_irqwork);
1715         queue_work(fsnotify_wq, &tr->fsnotify_work);
1716 }
1717
1718 static void trace_create_maxlat_file(struct trace_array *tr,
1719                                      struct dentry *d_tracer)
1720 {
1721         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1722         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1723         tr->d_max_latency = trace_create_file("tracing_max_latency",
1724                                               TRACE_MODE_WRITE,
1725                                               d_tracer, &tr->max_latency,
1726                                               &tracing_max_lat_fops);
1727 }
1728
1729 __init static int latency_fsnotify_init(void)
1730 {
1731         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1732                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1733         if (!fsnotify_wq) {
1734                 pr_err("Unable to allocate tr_max_lat_wq\n");
1735                 return -ENOMEM;
1736         }
1737         return 0;
1738 }
1739
1740 late_initcall_sync(latency_fsnotify_init);
1741
1742 void latency_fsnotify(struct trace_array *tr)
1743 {
1744         if (!fsnotify_wq)
1745                 return;
1746         /*
1747          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1748          * possible that we are called from __schedule() or do_idle(), which
1749          * could cause a deadlock.
1750          */
1751         irq_work_queue(&tr->fsnotify_irqwork);
1752 }
1753
1754 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1755         || defined(CONFIG_OSNOISE_TRACER)
1756
1757 #define trace_create_maxlat_file(tr, d_tracer)                          \
1758         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1759                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1760
1761 #else
1762 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1763 #endif
1764
1765 #ifdef CONFIG_TRACER_MAX_TRACE
1766 /*
1767  * Copy the new maximum trace into the separate maximum-trace
1768  * structure. (this way the maximum trace is permanently saved,
1769  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1770  */
1771 static void
1772 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1773 {
1774         struct array_buffer *trace_buf = &tr->array_buffer;
1775         struct array_buffer *max_buf = &tr->max_buffer;
1776         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1777         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1778
1779         max_buf->cpu = cpu;
1780         max_buf->time_start = data->preempt_timestamp;
1781
1782         max_data->saved_latency = tr->max_latency;
1783         max_data->critical_start = data->critical_start;
1784         max_data->critical_end = data->critical_end;
1785
1786         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1787         max_data->pid = tsk->pid;
1788         /*
1789          * If tsk == current, then use current_uid(), as that does not use
1790          * RCU. The irq tracer can be called out of RCU scope.
1791          */
1792         if (tsk == current)
1793                 max_data->uid = current_uid();
1794         else
1795                 max_data->uid = task_uid(tsk);
1796
1797         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1798         max_data->policy = tsk->policy;
1799         max_data->rt_priority = tsk->rt_priority;
1800
1801         /* record this tasks comm */
1802         tracing_record_cmdline(tsk);
1803         latency_fsnotify(tr);
1804 }
1805
1806 /**
1807  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1808  * @tr: tracer
1809  * @tsk: the task with the latency
1810  * @cpu: The cpu that initiated the trace.
1811  * @cond_data: User data associated with a conditional snapshot
1812  *
1813  * Flip the buffers between the @tr and the max_tr and record information
1814  * about which task was the cause of this latency.
1815  */
1816 void
1817 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1818               void *cond_data)
1819 {
1820         if (tr->stop_count)
1821                 return;
1822
1823         WARN_ON_ONCE(!irqs_disabled());
1824
1825         if (!tr->allocated_snapshot) {
1826                 /* Only the nop tracer should hit this when disabling */
1827                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1828                 return;
1829         }
1830
1831         arch_spin_lock(&tr->max_lock);
1832
1833         /* Inherit the recordable setting from array_buffer */
1834         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1835                 ring_buffer_record_on(tr->max_buffer.buffer);
1836         else
1837                 ring_buffer_record_off(tr->max_buffer.buffer);
1838
1839 #ifdef CONFIG_TRACER_SNAPSHOT
1840         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1841                 goto out_unlock;
1842 #endif
1843         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1844
1845         __update_max_tr(tr, tsk, cpu);
1846
1847  out_unlock:
1848         arch_spin_unlock(&tr->max_lock);
1849 }
1850
1851 /**
1852  * update_max_tr_single - only copy one trace over, and reset the rest
1853  * @tr: tracer
1854  * @tsk: task with the latency
1855  * @cpu: the cpu of the buffer to copy.
1856  *
1857  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1858  */
1859 void
1860 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1861 {
1862         int ret;
1863
1864         if (tr->stop_count)
1865                 return;
1866
1867         WARN_ON_ONCE(!irqs_disabled());
1868         if (!tr->allocated_snapshot) {
1869                 /* Only the nop tracer should hit this when disabling */
1870                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1871                 return;
1872         }
1873
1874         arch_spin_lock(&tr->max_lock);
1875
1876         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1877
1878         if (ret == -EBUSY) {
1879                 /*
1880                  * We failed to swap the buffer due to a commit taking
1881                  * place on this CPU. We fail to record, but we reset
1882                  * the max trace buffer (no one writes directly to it)
1883                  * and flag that it failed.
1884                  */
1885                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1886                         "Failed to swap buffers due to commit in progress\n");
1887         }
1888
1889         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1890
1891         __update_max_tr(tr, tsk, cpu);
1892         arch_spin_unlock(&tr->max_lock);
1893 }
1894 #endif /* CONFIG_TRACER_MAX_TRACE */
1895
1896 static int wait_on_pipe(struct trace_iterator *iter, int full)
1897 {
1898         /* Iterators are static, they should be filled or empty */
1899         if (trace_buffer_iter(iter, iter->cpu_file))
1900                 return 0;
1901
1902         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1903                                 full);
1904 }
1905
1906 #ifdef CONFIG_FTRACE_STARTUP_TEST
1907 static bool selftests_can_run;
1908
1909 struct trace_selftests {
1910         struct list_head                list;
1911         struct tracer                   *type;
1912 };
1913
1914 static LIST_HEAD(postponed_selftests);
1915
1916 static int save_selftest(struct tracer *type)
1917 {
1918         struct trace_selftests *selftest;
1919
1920         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1921         if (!selftest)
1922                 return -ENOMEM;
1923
1924         selftest->type = type;
1925         list_add(&selftest->list, &postponed_selftests);
1926         return 0;
1927 }
1928
1929 static int run_tracer_selftest(struct tracer *type)
1930 {
1931         struct trace_array *tr = &global_trace;
1932         struct tracer *saved_tracer = tr->current_trace;
1933         int ret;
1934
1935         if (!type->selftest || tracing_selftest_disabled)
1936                 return 0;
1937
1938         /*
1939          * If a tracer registers early in boot up (before scheduling is
1940          * initialized and such), then do not run its selftests yet.
1941          * Instead, run it a little later in the boot process.
1942          */
1943         if (!selftests_can_run)
1944                 return save_selftest(type);
1945
1946         if (!tracing_is_on()) {
1947                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1948                         type->name);
1949                 return 0;
1950         }
1951
1952         /*
1953          * Run a selftest on this tracer.
1954          * Here we reset the trace buffer, and set the current
1955          * tracer to be this tracer. The tracer can then run some
1956          * internal tracing to verify that everything is in order.
1957          * If we fail, we do not register this tracer.
1958          */
1959         tracing_reset_online_cpus(&tr->array_buffer);
1960
1961         tr->current_trace = type;
1962
1963 #ifdef CONFIG_TRACER_MAX_TRACE
1964         if (type->use_max_tr) {
1965                 /* If we expanded the buffers, make sure the max is expanded too */
1966                 if (ring_buffer_expanded)
1967                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1968                                            RING_BUFFER_ALL_CPUS);
1969                 tr->allocated_snapshot = true;
1970         }
1971 #endif
1972
1973         /* the test is responsible for initializing and enabling */
1974         pr_info("Testing tracer %s: ", type->name);
1975         ret = type->selftest(type, tr);
1976         /* the test is responsible for resetting too */
1977         tr->current_trace = saved_tracer;
1978         if (ret) {
1979                 printk(KERN_CONT "FAILED!\n");
1980                 /* Add the warning after printing 'FAILED' */
1981                 WARN_ON(1);
1982                 return -1;
1983         }
1984         /* Only reset on passing, to avoid touching corrupted buffers */
1985         tracing_reset_online_cpus(&tr->array_buffer);
1986
1987 #ifdef CONFIG_TRACER_MAX_TRACE
1988         if (type->use_max_tr) {
1989                 tr->allocated_snapshot = false;
1990
1991                 /* Shrink the max buffer again */
1992                 if (ring_buffer_expanded)
1993                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1994                                            RING_BUFFER_ALL_CPUS);
1995         }
1996 #endif
1997
1998         printk(KERN_CONT "PASSED\n");
1999         return 0;
2000 }
2001
2002 static __init int init_trace_selftests(void)
2003 {
2004         struct trace_selftests *p, *n;
2005         struct tracer *t, **last;
2006         int ret;
2007
2008         selftests_can_run = true;
2009
2010         mutex_lock(&trace_types_lock);
2011
2012         if (list_empty(&postponed_selftests))
2013                 goto out;
2014
2015         pr_info("Running postponed tracer tests:\n");
2016
2017         tracing_selftest_running = true;
2018         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2019                 /* This loop can take minutes when sanitizers are enabled, so
2020                  * lets make sure we allow RCU processing.
2021                  */
2022                 cond_resched();
2023                 ret = run_tracer_selftest(p->type);
2024                 /* If the test fails, then warn and remove from available_tracers */
2025                 if (ret < 0) {
2026                         WARN(1, "tracer: %s failed selftest, disabling\n",
2027                              p->type->name);
2028                         last = &trace_types;
2029                         for (t = trace_types; t; t = t->next) {
2030                                 if (t == p->type) {
2031                                         *last = t->next;
2032                                         break;
2033                                 }
2034                                 last = &t->next;
2035                         }
2036                 }
2037                 list_del(&p->list);
2038                 kfree(p);
2039         }
2040         tracing_selftest_running = false;
2041
2042  out:
2043         mutex_unlock(&trace_types_lock);
2044
2045         return 0;
2046 }
2047 core_initcall(init_trace_selftests);
2048 #else
2049 static inline int run_tracer_selftest(struct tracer *type)
2050 {
2051         return 0;
2052 }
2053 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2054
2055 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2056
2057 static void __init apply_trace_boot_options(void);
2058
2059 /**
2060  * register_tracer - register a tracer with the ftrace system.
2061  * @type: the plugin for the tracer
2062  *
2063  * Register a new plugin tracer.
2064  */
2065 int __init register_tracer(struct tracer *type)
2066 {
2067         struct tracer *t;
2068         int ret = 0;
2069
2070         if (!type->name) {
2071                 pr_info("Tracer must have a name\n");
2072                 return -1;
2073         }
2074
2075         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2076                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2077                 return -1;
2078         }
2079
2080         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2081                 pr_warn("Can not register tracer %s due to lockdown\n",
2082                            type->name);
2083                 return -EPERM;
2084         }
2085
2086         mutex_lock(&trace_types_lock);
2087
2088         tracing_selftest_running = true;
2089
2090         for (t = trace_types; t; t = t->next) {
2091                 if (strcmp(type->name, t->name) == 0) {
2092                         /* already found */
2093                         pr_info("Tracer %s already registered\n",
2094                                 type->name);
2095                         ret = -1;
2096                         goto out;
2097                 }
2098         }
2099
2100         if (!type->set_flag)
2101                 type->set_flag = &dummy_set_flag;
2102         if (!type->flags) {
2103                 /*allocate a dummy tracer_flags*/
2104                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2105                 if (!type->flags) {
2106                         ret = -ENOMEM;
2107                         goto out;
2108                 }
2109                 type->flags->val = 0;
2110                 type->flags->opts = dummy_tracer_opt;
2111         } else
2112                 if (!type->flags->opts)
2113                         type->flags->opts = dummy_tracer_opt;
2114
2115         /* store the tracer for __set_tracer_option */
2116         type->flags->trace = type;
2117
2118         ret = run_tracer_selftest(type);
2119         if (ret < 0)
2120                 goto out;
2121
2122         type->next = trace_types;
2123         trace_types = type;
2124         add_tracer_options(&global_trace, type);
2125
2126  out:
2127         tracing_selftest_running = false;
2128         mutex_unlock(&trace_types_lock);
2129
2130         if (ret || !default_bootup_tracer)
2131                 goto out_unlock;
2132
2133         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2134                 goto out_unlock;
2135
2136         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2137         /* Do we want this tracer to start on bootup? */
2138         tracing_set_tracer(&global_trace, type->name);
2139         default_bootup_tracer = NULL;
2140
2141         apply_trace_boot_options();
2142
2143         /* disable other selftests, since this will break it. */
2144         disable_tracing_selftest("running a tracer");
2145
2146  out_unlock:
2147         return ret;
2148 }
2149
2150 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2151 {
2152         struct trace_buffer *buffer = buf->buffer;
2153
2154         if (!buffer)
2155                 return;
2156
2157         ring_buffer_record_disable(buffer);
2158
2159         /* Make sure all commits have finished */
2160         synchronize_rcu();
2161         ring_buffer_reset_cpu(buffer, cpu);
2162
2163         ring_buffer_record_enable(buffer);
2164 }
2165
2166 void tracing_reset_online_cpus(struct array_buffer *buf)
2167 {
2168         struct trace_buffer *buffer = buf->buffer;
2169
2170         if (!buffer)
2171                 return;
2172
2173         ring_buffer_record_disable(buffer);
2174
2175         /* Make sure all commits have finished */
2176         synchronize_rcu();
2177
2178         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2179
2180         ring_buffer_reset_online_cpus(buffer);
2181
2182         ring_buffer_record_enable(buffer);
2183 }
2184
2185 /* Must have trace_types_lock held */
2186 void tracing_reset_all_online_cpus(void)
2187 {
2188         struct trace_array *tr;
2189
2190         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2191                 if (!tr->clear_trace)
2192                         continue;
2193                 tr->clear_trace = false;
2194                 tracing_reset_online_cpus(&tr->array_buffer);
2195 #ifdef CONFIG_TRACER_MAX_TRACE
2196                 tracing_reset_online_cpus(&tr->max_buffer);
2197 #endif
2198         }
2199 }
2200
2201 /*
2202  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2203  * is the tgid last observed corresponding to pid=i.
2204  */
2205 static int *tgid_map;
2206
2207 /* The maximum valid index into tgid_map. */
2208 static size_t tgid_map_max;
2209
2210 #define SAVED_CMDLINES_DEFAULT 128
2211 #define NO_CMDLINE_MAP UINT_MAX
2212 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2213 struct saved_cmdlines_buffer {
2214         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2215         unsigned *map_cmdline_to_pid;
2216         unsigned cmdline_num;
2217         int cmdline_idx;
2218         char *saved_cmdlines;
2219 };
2220 static struct saved_cmdlines_buffer *savedcmd;
2221
2222 static inline char *get_saved_cmdlines(int idx)
2223 {
2224         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2225 }
2226
2227 static inline void set_cmdline(int idx, const char *cmdline)
2228 {
2229         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2230 }
2231
2232 static int allocate_cmdlines_buffer(unsigned int val,
2233                                     struct saved_cmdlines_buffer *s)
2234 {
2235         s->map_cmdline_to_pid = kmalloc_array(val,
2236                                               sizeof(*s->map_cmdline_to_pid),
2237                                               GFP_KERNEL);
2238         if (!s->map_cmdline_to_pid)
2239                 return -ENOMEM;
2240
2241         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2242         if (!s->saved_cmdlines) {
2243                 kfree(s->map_cmdline_to_pid);
2244                 return -ENOMEM;
2245         }
2246
2247         s->cmdline_idx = 0;
2248         s->cmdline_num = val;
2249         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2250                sizeof(s->map_pid_to_cmdline));
2251         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2252                val * sizeof(*s->map_cmdline_to_pid));
2253
2254         return 0;
2255 }
2256
2257 static int trace_create_savedcmd(void)
2258 {
2259         int ret;
2260
2261         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2262         if (!savedcmd)
2263                 return -ENOMEM;
2264
2265         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2266         if (ret < 0) {
2267                 kfree(savedcmd);
2268                 savedcmd = NULL;
2269                 return -ENOMEM;
2270         }
2271
2272         return 0;
2273 }
2274
2275 int is_tracing_stopped(void)
2276 {
2277         return global_trace.stop_count;
2278 }
2279
2280 /**
2281  * tracing_start - quick start of the tracer
2282  *
2283  * If tracing is enabled but was stopped by tracing_stop,
2284  * this will start the tracer back up.
2285  */
2286 void tracing_start(void)
2287 {
2288         struct trace_buffer *buffer;
2289         unsigned long flags;
2290
2291         if (tracing_disabled)
2292                 return;
2293
2294         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2295         if (--global_trace.stop_count) {
2296                 if (global_trace.stop_count < 0) {
2297                         /* Someone screwed up their debugging */
2298                         WARN_ON_ONCE(1);
2299                         global_trace.stop_count = 0;
2300                 }
2301                 goto out;
2302         }
2303
2304         /* Prevent the buffers from switching */
2305         arch_spin_lock(&global_trace.max_lock);
2306
2307         buffer = global_trace.array_buffer.buffer;
2308         if (buffer)
2309                 ring_buffer_record_enable(buffer);
2310
2311 #ifdef CONFIG_TRACER_MAX_TRACE
2312         buffer = global_trace.max_buffer.buffer;
2313         if (buffer)
2314                 ring_buffer_record_enable(buffer);
2315 #endif
2316
2317         arch_spin_unlock(&global_trace.max_lock);
2318
2319  out:
2320         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2321 }
2322
2323 static void tracing_start_tr(struct trace_array *tr)
2324 {
2325         struct trace_buffer *buffer;
2326         unsigned long flags;
2327
2328         if (tracing_disabled)
2329                 return;
2330
2331         /* If global, we need to also start the max tracer */
2332         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2333                 return tracing_start();
2334
2335         raw_spin_lock_irqsave(&tr->start_lock, flags);
2336
2337         if (--tr->stop_count) {
2338                 if (tr->stop_count < 0) {
2339                         /* Someone screwed up their debugging */
2340                         WARN_ON_ONCE(1);
2341                         tr->stop_count = 0;
2342                 }
2343                 goto out;
2344         }
2345
2346         buffer = tr->array_buffer.buffer;
2347         if (buffer)
2348                 ring_buffer_record_enable(buffer);
2349
2350  out:
2351         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2352 }
2353
2354 /**
2355  * tracing_stop - quick stop of the tracer
2356  *
2357  * Light weight way to stop tracing. Use in conjunction with
2358  * tracing_start.
2359  */
2360 void tracing_stop(void)
2361 {
2362         struct trace_buffer *buffer;
2363         unsigned long flags;
2364
2365         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2366         if (global_trace.stop_count++)
2367                 goto out;
2368
2369         /* Prevent the buffers from switching */
2370         arch_spin_lock(&global_trace.max_lock);
2371
2372         buffer = global_trace.array_buffer.buffer;
2373         if (buffer)
2374                 ring_buffer_record_disable(buffer);
2375
2376 #ifdef CONFIG_TRACER_MAX_TRACE
2377         buffer = global_trace.max_buffer.buffer;
2378         if (buffer)
2379                 ring_buffer_record_disable(buffer);
2380 #endif
2381
2382         arch_spin_unlock(&global_trace.max_lock);
2383
2384  out:
2385         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2386 }
2387
2388 static void tracing_stop_tr(struct trace_array *tr)
2389 {
2390         struct trace_buffer *buffer;
2391         unsigned long flags;
2392
2393         /* If global, we need to also stop the max tracer */
2394         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2395                 return tracing_stop();
2396
2397         raw_spin_lock_irqsave(&tr->start_lock, flags);
2398         if (tr->stop_count++)
2399                 goto out;
2400
2401         buffer = tr->array_buffer.buffer;
2402         if (buffer)
2403                 ring_buffer_record_disable(buffer);
2404
2405  out:
2406         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2407 }
2408
2409 static int trace_save_cmdline(struct task_struct *tsk)
2410 {
2411         unsigned tpid, idx;
2412
2413         /* treat recording of idle task as a success */
2414         if (!tsk->pid)
2415                 return 1;
2416
2417         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2418
2419         /*
2420          * It's not the end of the world if we don't get
2421          * the lock, but we also don't want to spin
2422          * nor do we want to disable interrupts,
2423          * so if we miss here, then better luck next time.
2424          */
2425         if (!arch_spin_trylock(&trace_cmdline_lock))
2426                 return 0;
2427
2428         idx = savedcmd->map_pid_to_cmdline[tpid];
2429         if (idx == NO_CMDLINE_MAP) {
2430                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2431
2432                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2433                 savedcmd->cmdline_idx = idx;
2434         }
2435
2436         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2437         set_cmdline(idx, tsk->comm);
2438
2439         arch_spin_unlock(&trace_cmdline_lock);
2440
2441         return 1;
2442 }
2443
2444 static void __trace_find_cmdline(int pid, char comm[])
2445 {
2446         unsigned map;
2447         int tpid;
2448
2449         if (!pid) {
2450                 strcpy(comm, "<idle>");
2451                 return;
2452         }
2453
2454         if (WARN_ON_ONCE(pid < 0)) {
2455                 strcpy(comm, "<XXX>");
2456                 return;
2457         }
2458
2459         tpid = pid & (PID_MAX_DEFAULT - 1);
2460         map = savedcmd->map_pid_to_cmdline[tpid];
2461         if (map != NO_CMDLINE_MAP) {
2462                 tpid = savedcmd->map_cmdline_to_pid[map];
2463                 if (tpid == pid) {
2464                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2465                         return;
2466                 }
2467         }
2468         strcpy(comm, "<...>");
2469 }
2470
2471 void trace_find_cmdline(int pid, char comm[])
2472 {
2473         preempt_disable();
2474         arch_spin_lock(&trace_cmdline_lock);
2475
2476         __trace_find_cmdline(pid, comm);
2477
2478         arch_spin_unlock(&trace_cmdline_lock);
2479         preempt_enable();
2480 }
2481
2482 static int *trace_find_tgid_ptr(int pid)
2483 {
2484         /*
2485          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2486          * if we observe a non-NULL tgid_map then we also observe the correct
2487          * tgid_map_max.
2488          */
2489         int *map = smp_load_acquire(&tgid_map);
2490
2491         if (unlikely(!map || pid > tgid_map_max))
2492                 return NULL;
2493
2494         return &map[pid];
2495 }
2496
2497 int trace_find_tgid(int pid)
2498 {
2499         int *ptr = trace_find_tgid_ptr(pid);
2500
2501         return ptr ? *ptr : 0;
2502 }
2503
2504 static int trace_save_tgid(struct task_struct *tsk)
2505 {
2506         int *ptr;
2507
2508         /* treat recording of idle task as a success */
2509         if (!tsk->pid)
2510                 return 1;
2511
2512         ptr = trace_find_tgid_ptr(tsk->pid);
2513         if (!ptr)
2514                 return 0;
2515
2516         *ptr = tsk->tgid;
2517         return 1;
2518 }
2519
2520 static bool tracing_record_taskinfo_skip(int flags)
2521 {
2522         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2523                 return true;
2524         if (!__this_cpu_read(trace_taskinfo_save))
2525                 return true;
2526         return false;
2527 }
2528
2529 /**
2530  * tracing_record_taskinfo - record the task info of a task
2531  *
2532  * @task:  task to record
2533  * @flags: TRACE_RECORD_CMDLINE for recording comm
2534  *         TRACE_RECORD_TGID for recording tgid
2535  */
2536 void tracing_record_taskinfo(struct task_struct *task, int flags)
2537 {
2538         bool done;
2539
2540         if (tracing_record_taskinfo_skip(flags))
2541                 return;
2542
2543         /*
2544          * Record as much task information as possible. If some fail, continue
2545          * to try to record the others.
2546          */
2547         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2548         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2549
2550         /* If recording any information failed, retry again soon. */
2551         if (!done)
2552                 return;
2553
2554         __this_cpu_write(trace_taskinfo_save, false);
2555 }
2556
2557 /**
2558  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2559  *
2560  * @prev: previous task during sched_switch
2561  * @next: next task during sched_switch
2562  * @flags: TRACE_RECORD_CMDLINE for recording comm
2563  *         TRACE_RECORD_TGID for recording tgid
2564  */
2565 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2566                                           struct task_struct *next, int flags)
2567 {
2568         bool done;
2569
2570         if (tracing_record_taskinfo_skip(flags))
2571                 return;
2572
2573         /*
2574          * Record as much task information as possible. If some fail, continue
2575          * to try to record the others.
2576          */
2577         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2578         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2579         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2580         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2581
2582         /* If recording any information failed, retry again soon. */
2583         if (!done)
2584                 return;
2585
2586         __this_cpu_write(trace_taskinfo_save, false);
2587 }
2588
2589 /* Helpers to record a specific task information */
2590 void tracing_record_cmdline(struct task_struct *task)
2591 {
2592         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2593 }
2594
2595 void tracing_record_tgid(struct task_struct *task)
2596 {
2597         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2598 }
2599
2600 /*
2601  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2602  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2603  * simplifies those functions and keeps them in sync.
2604  */
2605 enum print_line_t trace_handle_return(struct trace_seq *s)
2606 {
2607         return trace_seq_has_overflowed(s) ?
2608                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2609 }
2610 EXPORT_SYMBOL_GPL(trace_handle_return);
2611
2612 static unsigned short migration_disable_value(void)
2613 {
2614 #if defined(CONFIG_SMP)
2615         return current->migration_disabled;
2616 #else
2617         return 0;
2618 #endif
2619 }
2620
2621 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2622 {
2623         unsigned int trace_flags = irqs_status;
2624         unsigned int pc;
2625
2626         pc = preempt_count();
2627
2628         if (pc & NMI_MASK)
2629                 trace_flags |= TRACE_FLAG_NMI;
2630         if (pc & HARDIRQ_MASK)
2631                 trace_flags |= TRACE_FLAG_HARDIRQ;
2632         if (in_serving_softirq())
2633                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2634
2635         if (tif_need_resched())
2636                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2637         if (test_preempt_need_resched())
2638                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2639         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2640                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2641 }
2642
2643 struct ring_buffer_event *
2644 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2645                           int type,
2646                           unsigned long len,
2647                           unsigned int trace_ctx)
2648 {
2649         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2650 }
2651
2652 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2653 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2654 static int trace_buffered_event_ref;
2655
2656 /**
2657  * trace_buffered_event_enable - enable buffering events
2658  *
2659  * When events are being filtered, it is quicker to use a temporary
2660  * buffer to write the event data into if there's a likely chance
2661  * that it will not be committed. The discard of the ring buffer
2662  * is not as fast as committing, and is much slower than copying
2663  * a commit.
2664  *
2665  * When an event is to be filtered, allocate per cpu buffers to
2666  * write the event data into, and if the event is filtered and discarded
2667  * it is simply dropped, otherwise, the entire data is to be committed
2668  * in one shot.
2669  */
2670 void trace_buffered_event_enable(void)
2671 {
2672         struct ring_buffer_event *event;
2673         struct page *page;
2674         int cpu;
2675
2676         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2677
2678         if (trace_buffered_event_ref++)
2679                 return;
2680
2681         for_each_tracing_cpu(cpu) {
2682                 page = alloc_pages_node(cpu_to_node(cpu),
2683                                         GFP_KERNEL | __GFP_NORETRY, 0);
2684                 if (!page)
2685                         goto failed;
2686
2687                 event = page_address(page);
2688                 memset(event, 0, sizeof(*event));
2689
2690                 per_cpu(trace_buffered_event, cpu) = event;
2691
2692                 preempt_disable();
2693                 if (cpu == smp_processor_id() &&
2694                     __this_cpu_read(trace_buffered_event) !=
2695                     per_cpu(trace_buffered_event, cpu))
2696                         WARN_ON_ONCE(1);
2697                 preempt_enable();
2698         }
2699
2700         return;
2701  failed:
2702         trace_buffered_event_disable();
2703 }
2704
2705 static void enable_trace_buffered_event(void *data)
2706 {
2707         /* Probably not needed, but do it anyway */
2708         smp_rmb();
2709         this_cpu_dec(trace_buffered_event_cnt);
2710 }
2711
2712 static void disable_trace_buffered_event(void *data)
2713 {
2714         this_cpu_inc(trace_buffered_event_cnt);
2715 }
2716
2717 /**
2718  * trace_buffered_event_disable - disable buffering events
2719  *
2720  * When a filter is removed, it is faster to not use the buffered
2721  * events, and to commit directly into the ring buffer. Free up
2722  * the temp buffers when there are no more users. This requires
2723  * special synchronization with current events.
2724  */
2725 void trace_buffered_event_disable(void)
2726 {
2727         int cpu;
2728
2729         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2730
2731         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2732                 return;
2733
2734         if (--trace_buffered_event_ref)
2735                 return;
2736
2737         preempt_disable();
2738         /* For each CPU, set the buffer as used. */
2739         smp_call_function_many(tracing_buffer_mask,
2740                                disable_trace_buffered_event, NULL, 1);
2741         preempt_enable();
2742
2743         /* Wait for all current users to finish */
2744         synchronize_rcu();
2745
2746         for_each_tracing_cpu(cpu) {
2747                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2748                 per_cpu(trace_buffered_event, cpu) = NULL;
2749         }
2750         /*
2751          * Make sure trace_buffered_event is NULL before clearing
2752          * trace_buffered_event_cnt.
2753          */
2754         smp_wmb();
2755
2756         preempt_disable();
2757         /* Do the work on each cpu */
2758         smp_call_function_many(tracing_buffer_mask,
2759                                enable_trace_buffered_event, NULL, 1);
2760         preempt_enable();
2761 }
2762
2763 static struct trace_buffer *temp_buffer;
2764
2765 struct ring_buffer_event *
2766 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2767                           struct trace_event_file *trace_file,
2768                           int type, unsigned long len,
2769                           unsigned int trace_ctx)
2770 {
2771         struct ring_buffer_event *entry;
2772         struct trace_array *tr = trace_file->tr;
2773         int val;
2774
2775         *current_rb = tr->array_buffer.buffer;
2776
2777         if (!tr->no_filter_buffering_ref &&
2778             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2779             (entry = this_cpu_read(trace_buffered_event))) {
2780                 /*
2781                  * Filtering is on, so try to use the per cpu buffer first.
2782                  * This buffer will simulate a ring_buffer_event,
2783                  * where the type_len is zero and the array[0] will
2784                  * hold the full length.
2785                  * (see include/linux/ring-buffer.h for details on
2786                  *  how the ring_buffer_event is structured).
2787                  *
2788                  * Using a temp buffer during filtering and copying it
2789                  * on a matched filter is quicker than writing directly
2790                  * into the ring buffer and then discarding it when
2791                  * it doesn't match. That is because the discard
2792                  * requires several atomic operations to get right.
2793                  * Copying on match and doing nothing on a failed match
2794                  * is still quicker than no copy on match, but having
2795                  * to discard out of the ring buffer on a failed match.
2796                  */
2797                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2798
2799                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2800
2801                 /*
2802                  * Preemption is disabled, but interrupts and NMIs
2803                  * can still come in now. If that happens after
2804                  * the above increment, then it will have to go
2805                  * back to the old method of allocating the event
2806                  * on the ring buffer, and if the filter fails, it
2807                  * will have to call ring_buffer_discard_commit()
2808                  * to remove it.
2809                  *
2810                  * Need to also check the unlikely case that the
2811                  * length is bigger than the temp buffer size.
2812                  * If that happens, then the reserve is pretty much
2813                  * guaranteed to fail, as the ring buffer currently
2814                  * only allows events less than a page. But that may
2815                  * change in the future, so let the ring buffer reserve
2816                  * handle the failure in that case.
2817                  */
2818                 if (val == 1 && likely(len <= max_len)) {
2819                         trace_event_setup(entry, type, trace_ctx);
2820                         entry->array[0] = len;
2821                         return entry;
2822                 }
2823                 this_cpu_dec(trace_buffered_event_cnt);
2824         }
2825
2826         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2827                                             trace_ctx);
2828         /*
2829          * If tracing is off, but we have triggers enabled
2830          * we still need to look at the event data. Use the temp_buffer
2831          * to store the trace event for the trigger to use. It's recursive
2832          * safe and will not be recorded anywhere.
2833          */
2834         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2835                 *current_rb = temp_buffer;
2836                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2837                                                     trace_ctx);
2838         }
2839         return entry;
2840 }
2841 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2842
2843 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2844 static DEFINE_MUTEX(tracepoint_printk_mutex);
2845
2846 static void output_printk(struct trace_event_buffer *fbuffer)
2847 {
2848         struct trace_event_call *event_call;
2849         struct trace_event_file *file;
2850         struct trace_event *event;
2851         unsigned long flags;
2852         struct trace_iterator *iter = tracepoint_print_iter;
2853
2854         /* We should never get here if iter is NULL */
2855         if (WARN_ON_ONCE(!iter))
2856                 return;
2857
2858         event_call = fbuffer->trace_file->event_call;
2859         if (!event_call || !event_call->event.funcs ||
2860             !event_call->event.funcs->trace)
2861                 return;
2862
2863         file = fbuffer->trace_file;
2864         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2865             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2866              !filter_match_preds(file->filter, fbuffer->entry)))
2867                 return;
2868
2869         event = &fbuffer->trace_file->event_call->event;
2870
2871         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2872         trace_seq_init(&iter->seq);
2873         iter->ent = fbuffer->entry;
2874         event_call->event.funcs->trace(iter, 0, event);
2875         trace_seq_putc(&iter->seq, 0);
2876         printk("%s", iter->seq.buffer);
2877
2878         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2879 }
2880
2881 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2882                              void *buffer, size_t *lenp,
2883                              loff_t *ppos)
2884 {
2885         int save_tracepoint_printk;
2886         int ret;
2887
2888         mutex_lock(&tracepoint_printk_mutex);
2889         save_tracepoint_printk = tracepoint_printk;
2890
2891         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2892
2893         /*
2894          * This will force exiting early, as tracepoint_printk
2895          * is always zero when tracepoint_printk_iter is not allocated
2896          */
2897         if (!tracepoint_print_iter)
2898                 tracepoint_printk = 0;
2899
2900         if (save_tracepoint_printk == tracepoint_printk)
2901                 goto out;
2902
2903         if (tracepoint_printk)
2904                 static_key_enable(&tracepoint_printk_key.key);
2905         else
2906                 static_key_disable(&tracepoint_printk_key.key);
2907
2908  out:
2909         mutex_unlock(&tracepoint_printk_mutex);
2910
2911         return ret;
2912 }
2913
2914 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2915 {
2916         enum event_trigger_type tt = ETT_NONE;
2917         struct trace_event_file *file = fbuffer->trace_file;
2918
2919         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2920                         fbuffer->entry, &tt))
2921                 goto discard;
2922
2923         if (static_key_false(&tracepoint_printk_key.key))
2924                 output_printk(fbuffer);
2925
2926         if (static_branch_unlikely(&trace_event_exports_enabled))
2927                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2928
2929         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2930                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2931
2932 discard:
2933         if (tt)
2934                 event_triggers_post_call(file, tt);
2935
2936 }
2937 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2938
2939 /*
2940  * Skip 3:
2941  *
2942  *   trace_buffer_unlock_commit_regs()
2943  *   trace_event_buffer_commit()
2944  *   trace_event_raw_event_xxx()
2945  */
2946 # define STACK_SKIP 3
2947
2948 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2949                                      struct trace_buffer *buffer,
2950                                      struct ring_buffer_event *event,
2951                                      unsigned int trace_ctx,
2952                                      struct pt_regs *regs)
2953 {
2954         __buffer_unlock_commit(buffer, event);
2955
2956         /*
2957          * If regs is not set, then skip the necessary functions.
2958          * Note, we can still get here via blktrace, wakeup tracer
2959          * and mmiotrace, but that's ok if they lose a function or
2960          * two. They are not that meaningful.
2961          */
2962         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2963         ftrace_trace_userstack(tr, buffer, trace_ctx);
2964 }
2965
2966 /*
2967  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2968  */
2969 void
2970 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2971                                    struct ring_buffer_event *event)
2972 {
2973         __buffer_unlock_commit(buffer, event);
2974 }
2975
2976 void
2977 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2978                parent_ip, unsigned int trace_ctx)
2979 {
2980         struct trace_event_call *call = &event_function;
2981         struct trace_buffer *buffer = tr->array_buffer.buffer;
2982         struct ring_buffer_event *event;
2983         struct ftrace_entry *entry;
2984
2985         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2986                                             trace_ctx);
2987         if (!event)
2988                 return;
2989         entry   = ring_buffer_event_data(event);
2990         entry->ip                       = ip;
2991         entry->parent_ip                = parent_ip;
2992
2993         if (!call_filter_check_discard(call, entry, buffer, event)) {
2994                 if (static_branch_unlikely(&trace_function_exports_enabled))
2995                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2996                 __buffer_unlock_commit(buffer, event);
2997         }
2998 }
2999
3000 #ifdef CONFIG_STACKTRACE
3001
3002 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3003 #define FTRACE_KSTACK_NESTING   4
3004
3005 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3006
3007 struct ftrace_stack {
3008         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3009 };
3010
3011
3012 struct ftrace_stacks {
3013         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3014 };
3015
3016 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3017 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3018
3019 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3020                                  unsigned int trace_ctx,
3021                                  int skip, struct pt_regs *regs)
3022 {
3023         struct trace_event_call *call = &event_kernel_stack;
3024         struct ring_buffer_event *event;
3025         unsigned int size, nr_entries;
3026         struct ftrace_stack *fstack;
3027         struct stack_entry *entry;
3028         int stackidx;
3029
3030         /*
3031          * Add one, for this function and the call to save_stack_trace()
3032          * If regs is set, then these functions will not be in the way.
3033          */
3034 #ifndef CONFIG_UNWINDER_ORC
3035         if (!regs)
3036                 skip++;
3037 #endif
3038
3039         preempt_disable_notrace();
3040
3041         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3042
3043         /* This should never happen. If it does, yell once and skip */
3044         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3045                 goto out;
3046
3047         /*
3048          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3049          * interrupt will either see the value pre increment or post
3050          * increment. If the interrupt happens pre increment it will have
3051          * restored the counter when it returns.  We just need a barrier to
3052          * keep gcc from moving things around.
3053          */
3054         barrier();
3055
3056         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3057         size = ARRAY_SIZE(fstack->calls);
3058
3059         if (regs) {
3060                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3061                                                    size, skip);
3062         } else {
3063                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3064         }
3065
3066         size = nr_entries * sizeof(unsigned long);
3067         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3068                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3069                                     trace_ctx);
3070         if (!event)
3071                 goto out;
3072         entry = ring_buffer_event_data(event);
3073
3074         memcpy(&entry->caller, fstack->calls, size);
3075         entry->size = nr_entries;
3076
3077         if (!call_filter_check_discard(call, entry, buffer, event))
3078                 __buffer_unlock_commit(buffer, event);
3079
3080  out:
3081         /* Again, don't let gcc optimize things here */
3082         barrier();
3083         __this_cpu_dec(ftrace_stack_reserve);
3084         preempt_enable_notrace();
3085
3086 }
3087
3088 static inline void ftrace_trace_stack(struct trace_array *tr,
3089                                       struct trace_buffer *buffer,
3090                                       unsigned int trace_ctx,
3091                                       int skip, struct pt_regs *regs)
3092 {
3093         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3094                 return;
3095
3096         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3097 }
3098
3099 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3100                    int skip)
3101 {
3102         struct trace_buffer *buffer = tr->array_buffer.buffer;
3103
3104         if (rcu_is_watching()) {
3105                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3106                 return;
3107         }
3108
3109         /*
3110          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3111          * but if the above rcu_is_watching() failed, then the NMI
3112          * triggered someplace critical, and rcu_irq_enter() should
3113          * not be called from NMI.
3114          */
3115         if (unlikely(in_nmi()))
3116                 return;
3117
3118         rcu_irq_enter_irqson();
3119         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3120         rcu_irq_exit_irqson();
3121 }
3122
3123 /**
3124  * trace_dump_stack - record a stack back trace in the trace buffer
3125  * @skip: Number of functions to skip (helper handlers)
3126  */
3127 void trace_dump_stack(int skip)
3128 {
3129         if (tracing_disabled || tracing_selftest_running)
3130                 return;
3131
3132 #ifndef CONFIG_UNWINDER_ORC
3133         /* Skip 1 to skip this function. */
3134         skip++;
3135 #endif
3136         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3137                              tracing_gen_ctx(), skip, NULL);
3138 }
3139 EXPORT_SYMBOL_GPL(trace_dump_stack);
3140
3141 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3142 static DEFINE_PER_CPU(int, user_stack_count);
3143
3144 static void
3145 ftrace_trace_userstack(struct trace_array *tr,
3146                        struct trace_buffer *buffer, unsigned int trace_ctx)
3147 {
3148         struct trace_event_call *call = &event_user_stack;
3149         struct ring_buffer_event *event;
3150         struct userstack_entry *entry;
3151
3152         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3153                 return;
3154
3155         /*
3156          * NMIs can not handle page faults, even with fix ups.
3157          * The save user stack can (and often does) fault.
3158          */
3159         if (unlikely(in_nmi()))
3160                 return;
3161
3162         /*
3163          * prevent recursion, since the user stack tracing may
3164          * trigger other kernel events.
3165          */
3166         preempt_disable();
3167         if (__this_cpu_read(user_stack_count))
3168                 goto out;
3169
3170         __this_cpu_inc(user_stack_count);
3171
3172         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3173                                             sizeof(*entry), trace_ctx);
3174         if (!event)
3175                 goto out_drop_count;
3176         entry   = ring_buffer_event_data(event);
3177
3178         entry->tgid             = current->tgid;
3179         memset(&entry->caller, 0, sizeof(entry->caller));
3180
3181         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3182         if (!call_filter_check_discard(call, entry, buffer, event))
3183                 __buffer_unlock_commit(buffer, event);
3184
3185  out_drop_count:
3186         __this_cpu_dec(user_stack_count);
3187  out:
3188         preempt_enable();
3189 }
3190 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3191 static void ftrace_trace_userstack(struct trace_array *tr,
3192                                    struct trace_buffer *buffer,
3193                                    unsigned int trace_ctx)
3194 {
3195 }
3196 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3197
3198 #endif /* CONFIG_STACKTRACE */
3199
3200 static inline void
3201 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3202                           unsigned long long delta)
3203 {
3204         entry->bottom_delta_ts = delta & U32_MAX;
3205         entry->top_delta_ts = (delta >> 32);
3206 }
3207
3208 void trace_last_func_repeats(struct trace_array *tr,
3209                              struct trace_func_repeats *last_info,
3210                              unsigned int trace_ctx)
3211 {
3212         struct trace_buffer *buffer = tr->array_buffer.buffer;
3213         struct func_repeats_entry *entry;
3214         struct ring_buffer_event *event;
3215         u64 delta;
3216
3217         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3218                                             sizeof(*entry), trace_ctx);
3219         if (!event)
3220                 return;
3221
3222         delta = ring_buffer_event_time_stamp(buffer, event) -
3223                 last_info->ts_last_call;
3224
3225         entry = ring_buffer_event_data(event);
3226         entry->ip = last_info->ip;
3227         entry->parent_ip = last_info->parent_ip;
3228         entry->count = last_info->count;
3229         func_repeats_set_delta_ts(entry, delta);
3230
3231         __buffer_unlock_commit(buffer, event);
3232 }
3233
3234 /* created for use with alloc_percpu */
3235 struct trace_buffer_struct {
3236         int nesting;
3237         char buffer[4][TRACE_BUF_SIZE];
3238 };
3239
3240 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3241
3242 /*
3243  * This allows for lockless recording.  If we're nested too deeply, then
3244  * this returns NULL.
3245  */
3246 static char *get_trace_buf(void)
3247 {
3248         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3249
3250         if (!trace_percpu_buffer || buffer->nesting >= 4)
3251                 return NULL;
3252
3253         buffer->nesting++;
3254
3255         /* Interrupts must see nesting incremented before we use the buffer */
3256         barrier();
3257         return &buffer->buffer[buffer->nesting - 1][0];
3258 }
3259
3260 static void put_trace_buf(void)
3261 {
3262         /* Don't let the decrement of nesting leak before this */
3263         barrier();
3264         this_cpu_dec(trace_percpu_buffer->nesting);
3265 }
3266
3267 static int alloc_percpu_trace_buffer(void)
3268 {
3269         struct trace_buffer_struct __percpu *buffers;
3270
3271         if (trace_percpu_buffer)
3272                 return 0;
3273
3274         buffers = alloc_percpu(struct trace_buffer_struct);
3275         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3276                 return -ENOMEM;
3277
3278         trace_percpu_buffer = buffers;
3279         return 0;
3280 }
3281
3282 static int buffers_allocated;
3283
3284 void trace_printk_init_buffers(void)
3285 {
3286         if (buffers_allocated)
3287                 return;
3288
3289         if (alloc_percpu_trace_buffer())
3290                 return;
3291
3292         /* trace_printk() is for debug use only. Don't use it in production. */
3293
3294         pr_warn("\n");
3295         pr_warn("**********************************************************\n");
3296         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3297         pr_warn("**                                                      **\n");
3298         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3299         pr_warn("**                                                      **\n");
3300         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3301         pr_warn("** unsafe for production use.                           **\n");
3302         pr_warn("**                                                      **\n");
3303         pr_warn("** If you see this message and you are not debugging    **\n");
3304         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3305         pr_warn("**                                                      **\n");
3306         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3307         pr_warn("**********************************************************\n");
3308
3309         /* Expand the buffers to set size */
3310         tracing_update_buffers();
3311
3312         buffers_allocated = 1;
3313
3314         /*
3315          * trace_printk_init_buffers() can be called by modules.
3316          * If that happens, then we need to start cmdline recording
3317          * directly here. If the global_trace.buffer is already
3318          * allocated here, then this was called by module code.
3319          */
3320         if (global_trace.array_buffer.buffer)
3321                 tracing_start_cmdline_record();
3322 }
3323 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3324
3325 void trace_printk_start_comm(void)
3326 {
3327         /* Start tracing comms if trace printk is set */
3328         if (!buffers_allocated)
3329                 return;
3330         tracing_start_cmdline_record();
3331 }
3332
3333 static void trace_printk_start_stop_comm(int enabled)
3334 {
3335         if (!buffers_allocated)
3336                 return;
3337
3338         if (enabled)
3339                 tracing_start_cmdline_record();
3340         else
3341                 tracing_stop_cmdline_record();
3342 }
3343
3344 /**
3345  * trace_vbprintk - write binary msg to tracing buffer
3346  * @ip:    The address of the caller
3347  * @fmt:   The string format to write to the buffer
3348  * @args:  Arguments for @fmt
3349  */
3350 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3351 {
3352         struct trace_event_call *call = &event_bprint;
3353         struct ring_buffer_event *event;
3354         struct trace_buffer *buffer;
3355         struct trace_array *tr = &global_trace;
3356         struct bprint_entry *entry;
3357         unsigned int trace_ctx;
3358         char *tbuffer;
3359         int len = 0, size;
3360
3361         if (unlikely(tracing_selftest_running || tracing_disabled))
3362                 return 0;
3363
3364         /* Don't pollute graph traces with trace_vprintk internals */
3365         pause_graph_tracing();
3366
3367         trace_ctx = tracing_gen_ctx();
3368         preempt_disable_notrace();
3369
3370         tbuffer = get_trace_buf();
3371         if (!tbuffer) {
3372                 len = 0;
3373                 goto out_nobuffer;
3374         }
3375
3376         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3377
3378         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3379                 goto out_put;
3380
3381         size = sizeof(*entry) + sizeof(u32) * len;
3382         buffer = tr->array_buffer.buffer;
3383         ring_buffer_nest_start(buffer);
3384         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3385                                             trace_ctx);
3386         if (!event)
3387                 goto out;
3388         entry = ring_buffer_event_data(event);
3389         entry->ip                       = ip;
3390         entry->fmt                      = fmt;
3391
3392         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3393         if (!call_filter_check_discard(call, entry, buffer, event)) {
3394                 __buffer_unlock_commit(buffer, event);
3395                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3396         }
3397
3398 out:
3399         ring_buffer_nest_end(buffer);
3400 out_put:
3401         put_trace_buf();
3402
3403 out_nobuffer:
3404         preempt_enable_notrace();
3405         unpause_graph_tracing();
3406
3407         return len;
3408 }
3409 EXPORT_SYMBOL_GPL(trace_vbprintk);
3410
3411 __printf(3, 0)
3412 static int
3413 __trace_array_vprintk(struct trace_buffer *buffer,
3414                       unsigned long ip, const char *fmt, va_list args)
3415 {
3416         struct trace_event_call *call = &event_print;
3417         struct ring_buffer_event *event;
3418         int len = 0, size;
3419         struct print_entry *entry;
3420         unsigned int trace_ctx;
3421         char *tbuffer;
3422
3423         if (tracing_disabled || tracing_selftest_running)
3424                 return 0;
3425
3426         /* Don't pollute graph traces with trace_vprintk internals */
3427         pause_graph_tracing();
3428
3429         trace_ctx = tracing_gen_ctx();
3430         preempt_disable_notrace();
3431
3432
3433         tbuffer = get_trace_buf();
3434         if (!tbuffer) {
3435                 len = 0;
3436                 goto out_nobuffer;
3437         }
3438
3439         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3440
3441         size = sizeof(*entry) + len + 1;
3442         ring_buffer_nest_start(buffer);
3443         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3444                                             trace_ctx);
3445         if (!event)
3446                 goto out;
3447         entry = ring_buffer_event_data(event);
3448         entry->ip = ip;
3449
3450         memcpy(&entry->buf, tbuffer, len + 1);
3451         if (!call_filter_check_discard(call, entry, buffer, event)) {
3452                 __buffer_unlock_commit(buffer, event);
3453                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3454         }
3455
3456 out:
3457         ring_buffer_nest_end(buffer);
3458         put_trace_buf();
3459
3460 out_nobuffer:
3461         preempt_enable_notrace();
3462         unpause_graph_tracing();
3463
3464         return len;
3465 }
3466
3467 __printf(3, 0)
3468 int trace_array_vprintk(struct trace_array *tr,
3469                         unsigned long ip, const char *fmt, va_list args)
3470 {
3471         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3472 }
3473
3474 /**
3475  * trace_array_printk - Print a message to a specific instance
3476  * @tr: The instance trace_array descriptor
3477  * @ip: The instruction pointer that this is called from.
3478  * @fmt: The format to print (printf format)
3479  *
3480  * If a subsystem sets up its own instance, they have the right to
3481  * printk strings into their tracing instance buffer using this
3482  * function. Note, this function will not write into the top level
3483  * buffer (use trace_printk() for that), as writing into the top level
3484  * buffer should only have events that can be individually disabled.
3485  * trace_printk() is only used for debugging a kernel, and should not
3486  * be ever incorporated in normal use.
3487  *
3488  * trace_array_printk() can be used, as it will not add noise to the
3489  * top level tracing buffer.
3490  *
3491  * Note, trace_array_init_printk() must be called on @tr before this
3492  * can be used.
3493  */
3494 __printf(3, 0)
3495 int trace_array_printk(struct trace_array *tr,
3496                        unsigned long ip, const char *fmt, ...)
3497 {
3498         int ret;
3499         va_list ap;
3500
3501         if (!tr)
3502                 return -ENOENT;
3503
3504         /* This is only allowed for created instances */
3505         if (tr == &global_trace)
3506                 return 0;
3507
3508         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3509                 return 0;
3510
3511         va_start(ap, fmt);
3512         ret = trace_array_vprintk(tr, ip, fmt, ap);
3513         va_end(ap);
3514         return ret;
3515 }
3516 EXPORT_SYMBOL_GPL(trace_array_printk);
3517
3518 /**
3519  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3520  * @tr: The trace array to initialize the buffers for
3521  *
3522  * As trace_array_printk() only writes into instances, they are OK to
3523  * have in the kernel (unlike trace_printk()). This needs to be called
3524  * before trace_array_printk() can be used on a trace_array.
3525  */
3526 int trace_array_init_printk(struct trace_array *tr)
3527 {
3528         if (!tr)
3529                 return -ENOENT;
3530
3531         /* This is only allowed for created instances */
3532         if (tr == &global_trace)
3533                 return -EINVAL;
3534
3535         return alloc_percpu_trace_buffer();
3536 }
3537 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3538
3539 __printf(3, 4)
3540 int trace_array_printk_buf(struct trace_buffer *buffer,
3541                            unsigned long ip, const char *fmt, ...)
3542 {
3543         int ret;
3544         va_list ap;
3545
3546         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3547                 return 0;
3548
3549         va_start(ap, fmt);
3550         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3551         va_end(ap);
3552         return ret;
3553 }
3554
3555 __printf(2, 0)
3556 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3557 {
3558         return trace_array_vprintk(&global_trace, ip, fmt, args);
3559 }
3560 EXPORT_SYMBOL_GPL(trace_vprintk);
3561
3562 static void trace_iterator_increment(struct trace_iterator *iter)
3563 {
3564         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3565
3566         iter->idx++;
3567         if (buf_iter)
3568                 ring_buffer_iter_advance(buf_iter);
3569 }
3570
3571 static struct trace_entry *
3572 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3573                 unsigned long *lost_events)
3574 {
3575         struct ring_buffer_event *event;
3576         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3577
3578         if (buf_iter) {
3579                 event = ring_buffer_iter_peek(buf_iter, ts);
3580                 if (lost_events)
3581                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3582                                 (unsigned long)-1 : 0;
3583         } else {
3584                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3585                                          lost_events);
3586         }
3587
3588         if (event) {
3589                 iter->ent_size = ring_buffer_event_length(event);
3590                 return ring_buffer_event_data(event);
3591         }
3592         iter->ent_size = 0;
3593         return NULL;
3594 }
3595
3596 static struct trace_entry *
3597 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3598                   unsigned long *missing_events, u64 *ent_ts)
3599 {
3600         struct trace_buffer *buffer = iter->array_buffer->buffer;
3601         struct trace_entry *ent, *next = NULL;
3602         unsigned long lost_events = 0, next_lost = 0;
3603         int cpu_file = iter->cpu_file;
3604         u64 next_ts = 0, ts;
3605         int next_cpu = -1;
3606         int next_size = 0;
3607         int cpu;
3608
3609         /*
3610          * If we are in a per_cpu trace file, don't bother by iterating over
3611          * all cpu and peek directly.
3612          */
3613         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3614                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3615                         return NULL;
3616                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3617                 if (ent_cpu)
3618                         *ent_cpu = cpu_file;
3619
3620                 return ent;
3621         }
3622
3623         for_each_tracing_cpu(cpu) {
3624
3625                 if (ring_buffer_empty_cpu(buffer, cpu))
3626                         continue;
3627
3628                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3629
3630                 /*
3631                  * Pick the entry with the smallest timestamp:
3632                  */
3633                 if (ent && (!next || ts < next_ts)) {
3634                         next = ent;
3635                         next_cpu = cpu;
3636                         next_ts = ts;
3637                         next_lost = lost_events;
3638                         next_size = iter->ent_size;
3639                 }
3640         }
3641
3642         iter->ent_size = next_size;
3643
3644         if (ent_cpu)
3645                 *ent_cpu = next_cpu;
3646
3647         if (ent_ts)
3648                 *ent_ts = next_ts;
3649
3650         if (missing_events)
3651                 *missing_events = next_lost;
3652
3653         return next;
3654 }
3655
3656 #define STATIC_FMT_BUF_SIZE     128
3657 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3658
3659 static char *trace_iter_expand_format(struct trace_iterator *iter)
3660 {
3661         char *tmp;
3662
3663         /*
3664          * iter->tr is NULL when used with tp_printk, which makes
3665          * this get called where it is not safe to call krealloc().
3666          */
3667         if (!iter->tr || iter->fmt == static_fmt_buf)
3668                 return NULL;
3669
3670         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3671                        GFP_KERNEL);
3672         if (tmp) {
3673                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3674                 iter->fmt = tmp;
3675         }
3676
3677         return tmp;
3678 }
3679
3680 /* Returns true if the string is safe to dereference from an event */
3681 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3682                            bool star, int len)
3683 {
3684         unsigned long addr = (unsigned long)str;
3685         struct trace_event *trace_event;
3686         struct trace_event_call *event;
3687
3688         /* Ignore strings with no length */
3689         if (star && !len)
3690                 return true;
3691
3692         /* OK if part of the event data */
3693         if ((addr >= (unsigned long)iter->ent) &&
3694             (addr < (unsigned long)iter->ent + iter->ent_size))
3695                 return true;
3696
3697         /* OK if part of the temp seq buffer */
3698         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3699             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3700                 return true;
3701
3702         /* Core rodata can not be freed */
3703         if (is_kernel_rodata(addr))
3704                 return true;
3705
3706         if (trace_is_tracepoint_string(str))
3707                 return true;
3708
3709         /*
3710          * Now this could be a module event, referencing core module
3711          * data, which is OK.
3712          */
3713         if (!iter->ent)
3714                 return false;
3715
3716         trace_event = ftrace_find_event(iter->ent->type);
3717         if (!trace_event)
3718                 return false;
3719
3720         event = container_of(trace_event, struct trace_event_call, event);
3721         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3722                 return false;
3723
3724         /* Would rather have rodata, but this will suffice */
3725         if (within_module_core(addr, event->module))
3726                 return true;
3727
3728         return false;
3729 }
3730
3731 static const char *show_buffer(struct trace_seq *s)
3732 {
3733         struct seq_buf *seq = &s->seq;
3734
3735         seq_buf_terminate(seq);
3736
3737         return seq->buffer;
3738 }
3739
3740 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3741
3742 static int test_can_verify_check(const char *fmt, ...)
3743 {
3744         char buf[16];
3745         va_list ap;
3746         int ret;
3747
3748         /*
3749          * The verifier is dependent on vsnprintf() modifies the va_list
3750          * passed to it, where it is sent as a reference. Some architectures
3751          * (like x86_32) passes it by value, which means that vsnprintf()
3752          * does not modify the va_list passed to it, and the verifier
3753          * would then need to be able to understand all the values that
3754          * vsnprintf can use. If it is passed by value, then the verifier
3755          * is disabled.
3756          */
3757         va_start(ap, fmt);
3758         vsnprintf(buf, 16, "%d", ap);
3759         ret = va_arg(ap, int);
3760         va_end(ap);
3761
3762         return ret;
3763 }
3764
3765 static void test_can_verify(void)
3766 {
3767         if (!test_can_verify_check("%d %d", 0, 1)) {
3768                 pr_info("trace event string verifier disabled\n");
3769                 static_branch_inc(&trace_no_verify);
3770         }
3771 }
3772
3773 /**
3774  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3775  * @iter: The iterator that holds the seq buffer and the event being printed
3776  * @fmt: The format used to print the event
3777  * @ap: The va_list holding the data to print from @fmt.
3778  *
3779  * This writes the data into the @iter->seq buffer using the data from
3780  * @fmt and @ap. If the format has a %s, then the source of the string
3781  * is examined to make sure it is safe to print, otherwise it will
3782  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3783  * pointer.
3784  */
3785 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3786                          va_list ap)
3787 {
3788         const char *p = fmt;
3789         const char *str;
3790         int i, j;
3791
3792         if (WARN_ON_ONCE(!fmt))
3793                 return;
3794
3795         if (static_branch_unlikely(&trace_no_verify))
3796                 goto print;
3797
3798         /* Don't bother checking when doing a ftrace_dump() */
3799         if (iter->fmt == static_fmt_buf)
3800                 goto print;
3801
3802         while (*p) {
3803                 bool star = false;
3804                 int len = 0;
3805
3806                 j = 0;
3807
3808                 /* We only care about %s and variants */
3809                 for (i = 0; p[i]; i++) {
3810                         if (i + 1 >= iter->fmt_size) {
3811                                 /*
3812                                  * If we can't expand the copy buffer,
3813                                  * just print it.
3814                                  */
3815                                 if (!trace_iter_expand_format(iter))
3816                                         goto print;
3817                         }
3818
3819                         if (p[i] == '\\' && p[i+1]) {
3820                                 i++;
3821                                 continue;
3822                         }
3823                         if (p[i] == '%') {
3824                                 /* Need to test cases like %08.*s */
3825                                 for (j = 1; p[i+j]; j++) {
3826                                         if (isdigit(p[i+j]) ||
3827                                             p[i+j] == '.')
3828                                                 continue;
3829                                         if (p[i+j] == '*') {
3830                                                 star = true;
3831                                                 continue;
3832                                         }
3833                                         break;
3834                                 }
3835                                 if (p[i+j] == 's')
3836                                         break;
3837                                 star = false;
3838                         }
3839                         j = 0;
3840                 }
3841                 /* If no %s found then just print normally */
3842                 if (!p[i])
3843                         break;
3844
3845                 /* Copy up to the %s, and print that */
3846                 strncpy(iter->fmt, p, i);
3847                 iter->fmt[i] = '\0';
3848                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3849
3850                 /*
3851                  * If iter->seq is full, the above call no longer guarantees
3852                  * that ap is in sync with fmt processing, and further calls
3853                  * to va_arg() can return wrong positional arguments.
3854                  *
3855                  * Ensure that ap is no longer used in this case.
3856                  */
3857                 if (iter->seq.full) {
3858                         p = "";
3859                         break;
3860                 }
3861
3862                 if (star)
3863                         len = va_arg(ap, int);
3864
3865                 /* The ap now points to the string data of the %s */
3866                 str = va_arg(ap, const char *);
3867
3868                 /*
3869                  * If you hit this warning, it is likely that the
3870                  * trace event in question used %s on a string that
3871                  * was saved at the time of the event, but may not be
3872                  * around when the trace is read. Use __string(),
3873                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3874                  * instead. See samples/trace_events/trace-events-sample.h
3875                  * for reference.
3876                  */
3877                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3878                               "fmt: '%s' current_buffer: '%s'",
3879                               fmt, show_buffer(&iter->seq))) {
3880                         int ret;
3881
3882                         /* Try to safely read the string */
3883                         if (star) {
3884                                 if (len + 1 > iter->fmt_size)
3885                                         len = iter->fmt_size - 1;
3886                                 if (len < 0)
3887                                         len = 0;
3888                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3889                                 iter->fmt[len] = 0;
3890                                 star = false;
3891                         } else {
3892                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3893                                                                   iter->fmt_size);
3894                         }
3895                         if (ret < 0)
3896                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3897                         else
3898                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3899                                                  str, iter->fmt);
3900                         str = "[UNSAFE-MEMORY]";
3901                         strcpy(iter->fmt, "%s");
3902                 } else {
3903                         strncpy(iter->fmt, p + i, j + 1);
3904                         iter->fmt[j+1] = '\0';
3905                 }
3906                 if (star)
3907                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3908                 else
3909                         trace_seq_printf(&iter->seq, iter->fmt, str);
3910
3911                 p += i + j + 1;
3912         }
3913  print:
3914         if (*p)
3915                 trace_seq_vprintf(&iter->seq, p, ap);
3916 }
3917
3918 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3919 {
3920         const char *p, *new_fmt;
3921         char *q;
3922
3923         if (WARN_ON_ONCE(!fmt))
3924                 return fmt;
3925
3926         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3927                 return fmt;
3928
3929         p = fmt;
3930         new_fmt = q = iter->fmt;
3931         while (*p) {
3932                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3933                         if (!trace_iter_expand_format(iter))
3934                                 return fmt;
3935
3936                         q += iter->fmt - new_fmt;
3937                         new_fmt = iter->fmt;
3938                 }
3939
3940                 *q++ = *p++;
3941
3942                 /* Replace %p with %px */
3943                 if (p[-1] == '%') {
3944                         if (p[0] == '%') {
3945                                 *q++ = *p++;
3946                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3947                                 *q++ = *p++;
3948                                 *q++ = 'x';
3949                         }
3950                 }
3951         }
3952         *q = '\0';
3953
3954         return new_fmt;
3955 }
3956
3957 #define STATIC_TEMP_BUF_SIZE    128
3958 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3959
3960 /* Find the next real entry, without updating the iterator itself */
3961 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3962                                           int *ent_cpu, u64 *ent_ts)
3963 {
3964         /* __find_next_entry will reset ent_size */
3965         int ent_size = iter->ent_size;
3966         struct trace_entry *entry;
3967
3968         /*
3969          * If called from ftrace_dump(), then the iter->temp buffer
3970          * will be the static_temp_buf and not created from kmalloc.
3971          * If the entry size is greater than the buffer, we can
3972          * not save it. Just return NULL in that case. This is only
3973          * used to add markers when two consecutive events' time
3974          * stamps have a large delta. See trace_print_lat_context()
3975          */
3976         if (iter->temp == static_temp_buf &&
3977             STATIC_TEMP_BUF_SIZE < ent_size)
3978                 return NULL;
3979
3980         /*
3981          * The __find_next_entry() may call peek_next_entry(), which may
3982          * call ring_buffer_peek() that may make the contents of iter->ent
3983          * undefined. Need to copy iter->ent now.
3984          */
3985         if (iter->ent && iter->ent != iter->temp) {
3986                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3987                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3988                         void *temp;
3989                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3990                         if (!temp)
3991                                 return NULL;
3992                         kfree(iter->temp);
3993                         iter->temp = temp;
3994                         iter->temp_size = iter->ent_size;
3995                 }
3996                 memcpy(iter->temp, iter->ent, iter->ent_size);
3997                 iter->ent = iter->temp;
3998         }
3999         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4000         /* Put back the original ent_size */
4001         iter->ent_size = ent_size;
4002
4003         return entry;
4004 }
4005
4006 /* Find the next real entry, and increment the iterator to the next entry */
4007 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4008 {
4009         iter->ent = __find_next_entry(iter, &iter->cpu,
4010                                       &iter->lost_events, &iter->ts);
4011
4012         if (iter->ent)
4013                 trace_iterator_increment(iter);
4014
4015         return iter->ent ? iter : NULL;
4016 }
4017
4018 static void trace_consume(struct trace_iterator *iter)
4019 {
4020         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4021                             &iter->lost_events);
4022 }
4023
4024 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4025 {
4026         struct trace_iterator *iter = m->private;
4027         int i = (int)*pos;
4028         void *ent;
4029
4030         WARN_ON_ONCE(iter->leftover);
4031
4032         (*pos)++;
4033
4034         /* can't go backwards */
4035         if (iter->idx > i)
4036                 return NULL;
4037
4038         if (iter->idx < 0)
4039                 ent = trace_find_next_entry_inc(iter);
4040         else
4041                 ent = iter;
4042
4043         while (ent && iter->idx < i)
4044                 ent = trace_find_next_entry_inc(iter);
4045
4046         iter->pos = *pos;
4047
4048         return ent;
4049 }
4050
4051 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4052 {
4053         struct ring_buffer_iter *buf_iter;
4054         unsigned long entries = 0;
4055         u64 ts;
4056
4057         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4058
4059         buf_iter = trace_buffer_iter(iter, cpu);
4060         if (!buf_iter)
4061                 return;
4062
4063         ring_buffer_iter_reset(buf_iter);
4064
4065         /*
4066          * We could have the case with the max latency tracers
4067          * that a reset never took place on a cpu. This is evident
4068          * by the timestamp being before the start of the buffer.
4069          */
4070         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4071                 if (ts >= iter->array_buffer->time_start)
4072                         break;
4073                 entries++;
4074                 ring_buffer_iter_advance(buf_iter);
4075         }
4076
4077         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4078 }
4079
4080 /*
4081  * The current tracer is copied to avoid a global locking
4082  * all around.
4083  */
4084 static void *s_start(struct seq_file *m, loff_t *pos)
4085 {
4086         struct trace_iterator *iter = m->private;
4087         struct trace_array *tr = iter->tr;
4088         int cpu_file = iter->cpu_file;
4089         void *p = NULL;
4090         loff_t l = 0;
4091         int cpu;
4092
4093         /*
4094          * copy the tracer to avoid using a global lock all around.
4095          * iter->trace is a copy of current_trace, the pointer to the
4096          * name may be used instead of a strcmp(), as iter->trace->name
4097          * will point to the same string as current_trace->name.
4098          */
4099         mutex_lock(&trace_types_lock);
4100         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4101                 *iter->trace = *tr->current_trace;
4102         mutex_unlock(&trace_types_lock);
4103
4104 #ifdef CONFIG_TRACER_MAX_TRACE
4105         if (iter->snapshot && iter->trace->use_max_tr)
4106                 return ERR_PTR(-EBUSY);
4107 #endif
4108
4109         if (*pos != iter->pos) {
4110                 iter->ent = NULL;
4111                 iter->cpu = 0;
4112                 iter->idx = -1;
4113
4114                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4115                         for_each_tracing_cpu(cpu)
4116                                 tracing_iter_reset(iter, cpu);
4117                 } else
4118                         tracing_iter_reset(iter, cpu_file);
4119
4120                 iter->leftover = 0;
4121                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4122                         ;
4123
4124         } else {
4125                 /*
4126                  * If we overflowed the seq_file before, then we want
4127                  * to just reuse the trace_seq buffer again.
4128                  */
4129                 if (iter->leftover)
4130                         p = iter;
4131                 else {
4132                         l = *pos - 1;
4133                         p = s_next(m, p, &l);
4134                 }
4135         }
4136
4137         trace_event_read_lock();
4138         trace_access_lock(cpu_file);
4139         return p;
4140 }
4141
4142 static void s_stop(struct seq_file *m, void *p)
4143 {
4144         struct trace_iterator *iter = m->private;
4145
4146 #ifdef CONFIG_TRACER_MAX_TRACE
4147         if (iter->snapshot && iter->trace->use_max_tr)
4148                 return;
4149 #endif
4150
4151         trace_access_unlock(iter->cpu_file);
4152         trace_event_read_unlock();
4153 }
4154
4155 static void
4156 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4157                       unsigned long *entries, int cpu)
4158 {
4159         unsigned long count;
4160
4161         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4162         /*
4163          * If this buffer has skipped entries, then we hold all
4164          * entries for the trace and we need to ignore the
4165          * ones before the time stamp.
4166          */
4167         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4168                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4169                 /* total is the same as the entries */
4170                 *total = count;
4171         } else
4172                 *total = count +
4173                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4174         *entries = count;
4175 }
4176
4177 static void
4178 get_total_entries(struct array_buffer *buf,
4179                   unsigned long *total, unsigned long *entries)
4180 {
4181         unsigned long t, e;
4182         int cpu;
4183
4184         *total = 0;
4185         *entries = 0;
4186
4187         for_each_tracing_cpu(cpu) {
4188                 get_total_entries_cpu(buf, &t, &e, cpu);
4189                 *total += t;
4190                 *entries += e;
4191         }
4192 }
4193
4194 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4195 {
4196         unsigned long total, entries;
4197
4198         if (!tr)
4199                 tr = &global_trace;
4200
4201         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4202
4203         return entries;
4204 }
4205
4206 unsigned long trace_total_entries(struct trace_array *tr)
4207 {
4208         unsigned long total, entries;
4209
4210         if (!tr)
4211                 tr = &global_trace;
4212
4213         get_total_entries(&tr->array_buffer, &total, &entries);
4214
4215         return entries;
4216 }
4217
4218 static void print_lat_help_header(struct seq_file *m)
4219 {
4220         seq_puts(m, "#                    _------=> CPU#            \n"
4221                     "#                   / _-----=> irqs-off        \n"
4222                     "#                  | / _----=> need-resched    \n"
4223                     "#                  || / _---=> hardirq/softirq \n"
4224                     "#                  ||| / _--=> preempt-depth   \n"
4225                     "#                  |||| / _-=> migrate-disable \n"
4226                     "#                  ||||| /     delay           \n"
4227                     "#  cmd     pid     |||||| time  |   caller     \n"
4228                     "#     \\   /        ||||||  \\    |    /       \n");
4229 }
4230
4231 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4232 {
4233         unsigned long total;
4234         unsigned long entries;
4235
4236         get_total_entries(buf, &total, &entries);
4237         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4238                    entries, total, num_online_cpus());
4239         seq_puts(m, "#\n");
4240 }
4241
4242 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4243                                    unsigned int flags)
4244 {
4245         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4246
4247         print_event_info(buf, m);
4248
4249         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4250         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4251 }
4252
4253 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4254                                        unsigned int flags)
4255 {
4256         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4257         const char *space = "            ";
4258         int prec = tgid ? 12 : 2;
4259
4260         print_event_info(buf, m);
4261
4262         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4263         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4264         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4265         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4266         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4267         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4268         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4269         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4270 }
4271
4272 void
4273 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4274 {
4275         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4276         struct array_buffer *buf = iter->array_buffer;
4277         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4278         struct tracer *type = iter->trace;
4279         unsigned long entries;
4280         unsigned long total;
4281         const char *name = "preemption";
4282
4283         name = type->name;
4284
4285         get_total_entries(buf, &total, &entries);
4286
4287         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4288                    name, UTS_RELEASE);
4289         seq_puts(m, "# -----------------------------------"
4290                  "---------------------------------\n");
4291         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4292                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4293                    nsecs_to_usecs(data->saved_latency),
4294                    entries,
4295                    total,
4296                    buf->cpu,
4297 #if defined(CONFIG_PREEMPT_NONE)
4298                    "server",
4299 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4300                    "desktop",
4301 #elif defined(CONFIG_PREEMPT)
4302                    "preempt",
4303 #elif defined(CONFIG_PREEMPT_RT)
4304                    "preempt_rt",
4305 #else
4306                    "unknown",
4307 #endif
4308                    /* These are reserved for later use */
4309                    0, 0, 0, 0);
4310 #ifdef CONFIG_SMP
4311         seq_printf(m, " #P:%d)\n", num_online_cpus());
4312 #else
4313         seq_puts(m, ")\n");
4314 #endif
4315         seq_puts(m, "#    -----------------\n");
4316         seq_printf(m, "#    | task: %.16s-%d "
4317                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4318                    data->comm, data->pid,
4319                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4320                    data->policy, data->rt_priority);
4321         seq_puts(m, "#    -----------------\n");
4322
4323         if (data->critical_start) {
4324                 seq_puts(m, "#  => started at: ");
4325                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4326                 trace_print_seq(m, &iter->seq);
4327                 seq_puts(m, "\n#  => ended at:   ");
4328                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4329                 trace_print_seq(m, &iter->seq);
4330                 seq_puts(m, "\n#\n");
4331         }
4332
4333         seq_puts(m, "#\n");
4334 }
4335
4336 static void test_cpu_buff_start(struct trace_iterator *iter)
4337 {
4338         struct trace_seq *s = &iter->seq;
4339         struct trace_array *tr = iter->tr;
4340
4341         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4342                 return;
4343
4344         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4345                 return;
4346
4347         if (cpumask_available(iter->started) &&
4348             cpumask_test_cpu(iter->cpu, iter->started))
4349                 return;
4350
4351         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4352                 return;
4353
4354         if (cpumask_available(iter->started))
4355                 cpumask_set_cpu(iter->cpu, iter->started);
4356
4357         /* Don't print started cpu buffer for the first entry of the trace */
4358         if (iter->idx > 1)
4359                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4360                                 iter->cpu);
4361 }
4362
4363 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4364 {
4365         struct trace_array *tr = iter->tr;
4366         struct trace_seq *s = &iter->seq;
4367         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4368         struct trace_entry *entry;
4369         struct trace_event *event;
4370
4371         entry = iter->ent;
4372
4373         test_cpu_buff_start(iter);
4374
4375         event = ftrace_find_event(entry->type);
4376
4377         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4378                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4379                         trace_print_lat_context(iter);
4380                 else
4381                         trace_print_context(iter);
4382         }
4383
4384         if (trace_seq_has_overflowed(s))
4385                 return TRACE_TYPE_PARTIAL_LINE;
4386
4387         if (event)
4388                 return event->funcs->trace(iter, sym_flags, event);
4389
4390         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4391
4392         return trace_handle_return(s);
4393 }
4394
4395 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4396 {
4397         struct trace_array *tr = iter->tr;
4398         struct trace_seq *s = &iter->seq;
4399         struct trace_entry *entry;
4400         struct trace_event *event;
4401
4402         entry = iter->ent;
4403
4404         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4405                 trace_seq_printf(s, "%d %d %llu ",
4406                                  entry->pid, iter->cpu, iter->ts);
4407
4408         if (trace_seq_has_overflowed(s))
4409                 return TRACE_TYPE_PARTIAL_LINE;
4410
4411         event = ftrace_find_event(entry->type);
4412         if (event)
4413                 return event->funcs->raw(iter, 0, event);
4414
4415         trace_seq_printf(s, "%d ?\n", entry->type);
4416
4417         return trace_handle_return(s);
4418 }
4419
4420 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4421 {
4422         struct trace_array *tr = iter->tr;
4423         struct trace_seq *s = &iter->seq;
4424         unsigned char newline = '\n';
4425         struct trace_entry *entry;
4426         struct trace_event *event;
4427
4428         entry = iter->ent;
4429
4430         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4431                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4432                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4433                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4434                 if (trace_seq_has_overflowed(s))
4435                         return TRACE_TYPE_PARTIAL_LINE;
4436         }
4437
4438         event = ftrace_find_event(entry->type);
4439         if (event) {
4440                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4441                 if (ret != TRACE_TYPE_HANDLED)
4442                         return ret;
4443         }
4444
4445         SEQ_PUT_FIELD(s, newline);
4446
4447         return trace_handle_return(s);
4448 }
4449
4450 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4451 {
4452         struct trace_array *tr = iter->tr;
4453         struct trace_seq *s = &iter->seq;
4454         struct trace_entry *entry;
4455         struct trace_event *event;
4456
4457         entry = iter->ent;
4458
4459         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4460                 SEQ_PUT_FIELD(s, entry->pid);
4461                 SEQ_PUT_FIELD(s, iter->cpu);
4462                 SEQ_PUT_FIELD(s, iter->ts);
4463                 if (trace_seq_has_overflowed(s))
4464                         return TRACE_TYPE_PARTIAL_LINE;
4465         }
4466
4467         event = ftrace_find_event(entry->type);
4468         return event ? event->funcs->binary(iter, 0, event) :
4469                 TRACE_TYPE_HANDLED;
4470 }
4471
4472 int trace_empty(struct trace_iterator *iter)
4473 {
4474         struct ring_buffer_iter *buf_iter;
4475         int cpu;
4476
4477         /* If we are looking at one CPU buffer, only check that one */
4478         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4479                 cpu = iter->cpu_file;
4480                 buf_iter = trace_buffer_iter(iter, cpu);
4481                 if (buf_iter) {
4482                         if (!ring_buffer_iter_empty(buf_iter))
4483                                 return 0;
4484                 } else {
4485                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4486                                 return 0;
4487                 }
4488                 return 1;
4489         }
4490
4491         for_each_tracing_cpu(cpu) {
4492                 buf_iter = trace_buffer_iter(iter, cpu);
4493                 if (buf_iter) {
4494                         if (!ring_buffer_iter_empty(buf_iter))
4495                                 return 0;
4496                 } else {
4497                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4498                                 return 0;
4499                 }
4500         }
4501
4502         return 1;
4503 }
4504
4505 /*  Called with trace_event_read_lock() held. */
4506 enum print_line_t print_trace_line(struct trace_iterator *iter)
4507 {
4508         struct trace_array *tr = iter->tr;
4509         unsigned long trace_flags = tr->trace_flags;
4510         enum print_line_t ret;
4511
4512         if (iter->lost_events) {
4513                 if (iter->lost_events == (unsigned long)-1)
4514                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4515                                          iter->cpu);
4516                 else
4517                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4518                                          iter->cpu, iter->lost_events);
4519                 if (trace_seq_has_overflowed(&iter->seq))
4520                         return TRACE_TYPE_PARTIAL_LINE;
4521         }
4522
4523         if (iter->trace && iter->trace->print_line) {
4524                 ret = iter->trace->print_line(iter);
4525                 if (ret != TRACE_TYPE_UNHANDLED)
4526                         return ret;
4527         }
4528
4529         if (iter->ent->type == TRACE_BPUTS &&
4530                         trace_flags & TRACE_ITER_PRINTK &&
4531                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4532                 return trace_print_bputs_msg_only(iter);
4533
4534         if (iter->ent->type == TRACE_BPRINT &&
4535                         trace_flags & TRACE_ITER_PRINTK &&
4536                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4537                 return trace_print_bprintk_msg_only(iter);
4538
4539         if (iter->ent->type == TRACE_PRINT &&
4540                         trace_flags & TRACE_ITER_PRINTK &&
4541                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4542                 return trace_print_printk_msg_only(iter);
4543
4544         if (trace_flags & TRACE_ITER_BIN)
4545                 return print_bin_fmt(iter);
4546
4547         if (trace_flags & TRACE_ITER_HEX)
4548                 return print_hex_fmt(iter);
4549
4550         if (trace_flags & TRACE_ITER_RAW)
4551                 return print_raw_fmt(iter);
4552
4553         return print_trace_fmt(iter);
4554 }
4555
4556 void trace_latency_header(struct seq_file *m)
4557 {
4558         struct trace_iterator *iter = m->private;
4559         struct trace_array *tr = iter->tr;
4560
4561         /* print nothing if the buffers are empty */
4562         if (trace_empty(iter))
4563                 return;
4564
4565         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4566                 print_trace_header(m, iter);
4567
4568         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4569                 print_lat_help_header(m);
4570 }
4571
4572 void trace_default_header(struct seq_file *m)
4573 {
4574         struct trace_iterator *iter = m->private;
4575         struct trace_array *tr = iter->tr;
4576         unsigned long trace_flags = tr->trace_flags;
4577
4578         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4579                 return;
4580
4581         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4582                 /* print nothing if the buffers are empty */
4583                 if (trace_empty(iter))
4584                         return;
4585                 print_trace_header(m, iter);
4586                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4587                         print_lat_help_header(m);
4588         } else {
4589                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4590                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4591                                 print_func_help_header_irq(iter->array_buffer,
4592                                                            m, trace_flags);
4593                         else
4594                                 print_func_help_header(iter->array_buffer, m,
4595                                                        trace_flags);
4596                 }
4597         }
4598 }
4599
4600 static void test_ftrace_alive(struct seq_file *m)
4601 {
4602         if (!ftrace_is_dead())
4603                 return;
4604         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4605                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4606 }
4607
4608 #ifdef CONFIG_TRACER_MAX_TRACE
4609 static void show_snapshot_main_help(struct seq_file *m)
4610 {
4611         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4612                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4613                     "#                      Takes a snapshot of the main buffer.\n"
4614                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4615                     "#                      (Doesn't have to be '2' works with any number that\n"
4616                     "#                       is not a '0' or '1')\n");
4617 }
4618
4619 static void show_snapshot_percpu_help(struct seq_file *m)
4620 {
4621         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4622 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4623         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4624                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4625 #else
4626         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4627                     "#                     Must use main snapshot file to allocate.\n");
4628 #endif
4629         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4630                     "#                      (Doesn't have to be '2' works with any number that\n"
4631                     "#                       is not a '0' or '1')\n");
4632 }
4633
4634 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4635 {
4636         if (iter->tr->allocated_snapshot)
4637                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4638         else
4639                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4640
4641         seq_puts(m, "# Snapshot commands:\n");
4642         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4643                 show_snapshot_main_help(m);
4644         else
4645                 show_snapshot_percpu_help(m);
4646 }
4647 #else
4648 /* Should never be called */
4649 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4650 #endif
4651
4652 static int s_show(struct seq_file *m, void *v)
4653 {
4654         struct trace_iterator *iter = v;
4655         int ret;
4656
4657         if (iter->ent == NULL) {
4658                 if (iter->tr) {
4659                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4660                         seq_puts(m, "#\n");
4661                         test_ftrace_alive(m);
4662                 }
4663                 if (iter->snapshot && trace_empty(iter))
4664                         print_snapshot_help(m, iter);
4665                 else if (iter->trace && iter->trace->print_header)
4666                         iter->trace->print_header(m);
4667                 else
4668                         trace_default_header(m);
4669
4670         } else if (iter->leftover) {
4671                 /*
4672                  * If we filled the seq_file buffer earlier, we
4673                  * want to just show it now.
4674                  */
4675                 ret = trace_print_seq(m, &iter->seq);
4676
4677                 /* ret should this time be zero, but you never know */
4678                 iter->leftover = ret;
4679
4680         } else {
4681                 print_trace_line(iter);
4682                 ret = trace_print_seq(m, &iter->seq);
4683                 /*
4684                  * If we overflow the seq_file buffer, then it will
4685                  * ask us for this data again at start up.
4686                  * Use that instead.
4687                  *  ret is 0 if seq_file write succeeded.
4688                  *        -1 otherwise.
4689                  */
4690                 iter->leftover = ret;
4691         }
4692
4693         return 0;
4694 }
4695
4696 /*
4697  * Should be used after trace_array_get(), trace_types_lock
4698  * ensures that i_cdev was already initialized.
4699  */
4700 static inline int tracing_get_cpu(struct inode *inode)
4701 {
4702         if (inode->i_cdev) /* See trace_create_cpu_file() */
4703                 return (long)inode->i_cdev - 1;
4704         return RING_BUFFER_ALL_CPUS;
4705 }
4706
4707 static const struct seq_operations tracer_seq_ops = {
4708         .start          = s_start,
4709         .next           = s_next,
4710         .stop           = s_stop,
4711         .show           = s_show,
4712 };
4713
4714 static struct trace_iterator *
4715 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4716 {
4717         struct trace_array *tr = inode->i_private;
4718         struct trace_iterator *iter;
4719         int cpu;
4720
4721         if (tracing_disabled)
4722                 return ERR_PTR(-ENODEV);
4723
4724         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4725         if (!iter)
4726                 return ERR_PTR(-ENOMEM);
4727
4728         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4729                                     GFP_KERNEL);
4730         if (!iter->buffer_iter)
4731                 goto release;
4732
4733         /*
4734          * trace_find_next_entry() may need to save off iter->ent.
4735          * It will place it into the iter->temp buffer. As most
4736          * events are less than 128, allocate a buffer of that size.
4737          * If one is greater, then trace_find_next_entry() will
4738          * allocate a new buffer to adjust for the bigger iter->ent.
4739          * It's not critical if it fails to get allocated here.
4740          */
4741         iter->temp = kmalloc(128, GFP_KERNEL);
4742         if (iter->temp)
4743                 iter->temp_size = 128;
4744
4745         /*
4746          * trace_event_printf() may need to modify given format
4747          * string to replace %p with %px so that it shows real address
4748          * instead of hash value. However, that is only for the event
4749          * tracing, other tracer may not need. Defer the allocation
4750          * until it is needed.
4751          */
4752         iter->fmt = NULL;
4753         iter->fmt_size = 0;
4754
4755         /*
4756          * We make a copy of the current tracer to avoid concurrent
4757          * changes on it while we are reading.
4758          */
4759         mutex_lock(&trace_types_lock);
4760         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4761         if (!iter->trace)
4762                 goto fail;
4763
4764         *iter->trace = *tr->current_trace;
4765
4766         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4767                 goto fail;
4768
4769         iter->tr = tr;
4770
4771 #ifdef CONFIG_TRACER_MAX_TRACE
4772         /* Currently only the top directory has a snapshot */
4773         if (tr->current_trace->print_max || snapshot)
4774                 iter->array_buffer = &tr->max_buffer;
4775         else
4776 #endif
4777                 iter->array_buffer = &tr->array_buffer;
4778         iter->snapshot = snapshot;
4779         iter->pos = -1;
4780         iter->cpu_file = tracing_get_cpu(inode);
4781         mutex_init(&iter->mutex);
4782
4783         /* Notify the tracer early; before we stop tracing. */
4784         if (iter->trace->open)
4785                 iter->trace->open(iter);
4786
4787         /* Annotate start of buffers if we had overruns */
4788         if (ring_buffer_overruns(iter->array_buffer->buffer))
4789                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4790
4791         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4792         if (trace_clocks[tr->clock_id].in_ns)
4793                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4794
4795         /*
4796          * If pause-on-trace is enabled, then stop the trace while
4797          * dumping, unless this is the "snapshot" file
4798          */
4799         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4800                 tracing_stop_tr(tr);
4801
4802         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4803                 for_each_tracing_cpu(cpu) {
4804                         iter->buffer_iter[cpu] =
4805                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4806                                                          cpu, GFP_KERNEL);
4807                 }
4808                 ring_buffer_read_prepare_sync();
4809                 for_each_tracing_cpu(cpu) {
4810                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4811                         tracing_iter_reset(iter, cpu);
4812                 }
4813         } else {
4814                 cpu = iter->cpu_file;
4815                 iter->buffer_iter[cpu] =
4816                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4817                                                  cpu, GFP_KERNEL);
4818                 ring_buffer_read_prepare_sync();
4819                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4820                 tracing_iter_reset(iter, cpu);
4821         }
4822
4823         mutex_unlock(&trace_types_lock);
4824
4825         return iter;
4826
4827  fail:
4828         mutex_unlock(&trace_types_lock);
4829         kfree(iter->trace);
4830         kfree(iter->temp);
4831         kfree(iter->buffer_iter);
4832 release:
4833         seq_release_private(inode, file);
4834         return ERR_PTR(-ENOMEM);
4835 }
4836
4837 int tracing_open_generic(struct inode *inode, struct file *filp)
4838 {
4839         int ret;
4840
4841         ret = tracing_check_open_get_tr(NULL);
4842         if (ret)
4843                 return ret;
4844
4845         filp->private_data = inode->i_private;
4846         return 0;
4847 }
4848
4849 bool tracing_is_disabled(void)
4850 {
4851         return (tracing_disabled) ? true: false;
4852 }
4853
4854 /*
4855  * Open and update trace_array ref count.
4856  * Must have the current trace_array passed to it.
4857  */
4858 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4859 {
4860         struct trace_array *tr = inode->i_private;
4861         int ret;
4862
4863         ret = tracing_check_open_get_tr(tr);
4864         if (ret)
4865                 return ret;
4866
4867         filp->private_data = inode->i_private;
4868
4869         return 0;
4870 }
4871
4872 static int tracing_release(struct inode *inode, struct file *file)
4873 {
4874         struct trace_array *tr = inode->i_private;
4875         struct seq_file *m = file->private_data;
4876         struct trace_iterator *iter;
4877         int cpu;
4878
4879         if (!(file->f_mode & FMODE_READ)) {
4880                 trace_array_put(tr);
4881                 return 0;
4882         }
4883
4884         /* Writes do not use seq_file */
4885         iter = m->private;
4886         mutex_lock(&trace_types_lock);
4887
4888         for_each_tracing_cpu(cpu) {
4889                 if (iter->buffer_iter[cpu])
4890                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4891         }
4892
4893         if (iter->trace && iter->trace->close)
4894                 iter->trace->close(iter);
4895
4896         if (!iter->snapshot && tr->stop_count)
4897                 /* reenable tracing if it was previously enabled */
4898                 tracing_start_tr(tr);
4899
4900         __trace_array_put(tr);
4901
4902         mutex_unlock(&trace_types_lock);
4903
4904         mutex_destroy(&iter->mutex);
4905         free_cpumask_var(iter->started);
4906         kfree(iter->fmt);
4907         kfree(iter->temp);
4908         kfree(iter->trace);
4909         kfree(iter->buffer_iter);
4910         seq_release_private(inode, file);
4911
4912         return 0;
4913 }
4914
4915 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4916 {
4917         struct trace_array *tr = inode->i_private;
4918
4919         trace_array_put(tr);
4920         return 0;
4921 }
4922
4923 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4924 {
4925         struct trace_array *tr = inode->i_private;
4926
4927         trace_array_put(tr);
4928
4929         return single_release(inode, file);
4930 }
4931
4932 static int tracing_open(struct inode *inode, struct file *file)
4933 {
4934         struct trace_array *tr = inode->i_private;
4935         struct trace_iterator *iter;
4936         int ret;
4937
4938         ret = tracing_check_open_get_tr(tr);
4939         if (ret)
4940                 return ret;
4941
4942         /* If this file was open for write, then erase contents */
4943         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4944                 int cpu = tracing_get_cpu(inode);
4945                 struct array_buffer *trace_buf = &tr->array_buffer;
4946
4947 #ifdef CONFIG_TRACER_MAX_TRACE
4948                 if (tr->current_trace->print_max)
4949                         trace_buf = &tr->max_buffer;
4950 #endif
4951
4952                 if (cpu == RING_BUFFER_ALL_CPUS)
4953                         tracing_reset_online_cpus(trace_buf);
4954                 else
4955                         tracing_reset_cpu(trace_buf, cpu);
4956         }
4957
4958         if (file->f_mode & FMODE_READ) {
4959                 iter = __tracing_open(inode, file, false);
4960                 if (IS_ERR(iter))
4961                         ret = PTR_ERR(iter);
4962                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4963                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4964         }
4965
4966         if (ret < 0)
4967                 trace_array_put(tr);
4968
4969         return ret;
4970 }
4971
4972 /*
4973  * Some tracers are not suitable for instance buffers.
4974  * A tracer is always available for the global array (toplevel)
4975  * or if it explicitly states that it is.
4976  */
4977 static bool
4978 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4979 {
4980         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4981 }
4982
4983 /* Find the next tracer that this trace array may use */
4984 static struct tracer *
4985 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4986 {
4987         while (t && !trace_ok_for_array(t, tr))
4988                 t = t->next;
4989
4990         return t;
4991 }
4992
4993 static void *
4994 t_next(struct seq_file *m, void *v, loff_t *pos)
4995 {
4996         struct trace_array *tr = m->private;
4997         struct tracer *t = v;
4998
4999         (*pos)++;
5000
5001         if (t)
5002                 t = get_tracer_for_array(tr, t->next);
5003
5004         return t;
5005 }
5006
5007 static void *t_start(struct seq_file *m, loff_t *pos)
5008 {
5009         struct trace_array *tr = m->private;
5010         struct tracer *t;
5011         loff_t l = 0;
5012
5013         mutex_lock(&trace_types_lock);
5014
5015         t = get_tracer_for_array(tr, trace_types);
5016         for (; t && l < *pos; t = t_next(m, t, &l))
5017                         ;
5018
5019         return t;
5020 }
5021
5022 static void t_stop(struct seq_file *m, void *p)
5023 {
5024         mutex_unlock(&trace_types_lock);
5025 }
5026
5027 static int t_show(struct seq_file *m, void *v)
5028 {
5029         struct tracer *t = v;
5030
5031         if (!t)
5032                 return 0;
5033
5034         seq_puts(m, t->name);
5035         if (t->next)
5036                 seq_putc(m, ' ');
5037         else
5038                 seq_putc(m, '\n');
5039
5040         return 0;
5041 }
5042
5043 static const struct seq_operations show_traces_seq_ops = {
5044         .start          = t_start,
5045         .next           = t_next,
5046         .stop           = t_stop,
5047         .show           = t_show,
5048 };
5049
5050 static int show_traces_open(struct inode *inode, struct file *file)
5051 {
5052         struct trace_array *tr = inode->i_private;
5053         struct seq_file *m;
5054         int ret;
5055
5056         ret = tracing_check_open_get_tr(tr);
5057         if (ret)
5058                 return ret;
5059
5060         ret = seq_open(file, &show_traces_seq_ops);
5061         if (ret) {
5062                 trace_array_put(tr);
5063                 return ret;
5064         }
5065
5066         m = file->private_data;
5067         m->private = tr;
5068
5069         return 0;
5070 }
5071
5072 static int show_traces_release(struct inode *inode, struct file *file)
5073 {
5074         struct trace_array *tr = inode->i_private;
5075
5076         trace_array_put(tr);
5077         return seq_release(inode, file);
5078 }
5079
5080 static ssize_t
5081 tracing_write_stub(struct file *filp, const char __user *ubuf,
5082                    size_t count, loff_t *ppos)
5083 {
5084         return count;
5085 }
5086
5087 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5088 {
5089         int ret;
5090
5091         if (file->f_mode & FMODE_READ)
5092                 ret = seq_lseek(file, offset, whence);
5093         else
5094                 file->f_pos = ret = 0;
5095
5096         return ret;
5097 }
5098
5099 static const struct file_operations tracing_fops = {
5100         .open           = tracing_open,
5101         .read           = seq_read,
5102         .write          = tracing_write_stub,
5103         .llseek         = tracing_lseek,
5104         .release        = tracing_release,
5105 };
5106
5107 static const struct file_operations show_traces_fops = {
5108         .open           = show_traces_open,
5109         .read           = seq_read,
5110         .llseek         = seq_lseek,
5111         .release        = show_traces_release,
5112 };
5113
5114 static ssize_t
5115 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5116                      size_t count, loff_t *ppos)
5117 {
5118         struct trace_array *tr = file_inode(filp)->i_private;
5119         char *mask_str;
5120         int len;
5121
5122         len = snprintf(NULL, 0, "%*pb\n",
5123                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5124         mask_str = kmalloc(len, GFP_KERNEL);
5125         if (!mask_str)
5126                 return -ENOMEM;
5127
5128         len = snprintf(mask_str, len, "%*pb\n",
5129                        cpumask_pr_args(tr->tracing_cpumask));
5130         if (len >= count) {
5131                 count = -EINVAL;
5132                 goto out_err;
5133         }
5134         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5135
5136 out_err:
5137         kfree(mask_str);
5138
5139         return count;
5140 }
5141
5142 int tracing_set_cpumask(struct trace_array *tr,
5143                         cpumask_var_t tracing_cpumask_new)
5144 {
5145         int cpu;
5146
5147         if (!tr)
5148                 return -EINVAL;
5149
5150         local_irq_disable();
5151         arch_spin_lock(&tr->max_lock);
5152         for_each_tracing_cpu(cpu) {
5153                 /*
5154                  * Increase/decrease the disabled counter if we are
5155                  * about to flip a bit in the cpumask:
5156                  */
5157                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5158                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5159                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5160                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5161                 }
5162                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5163                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5164                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5165                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5166                 }
5167         }
5168         arch_spin_unlock(&tr->max_lock);
5169         local_irq_enable();
5170
5171         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5172
5173         return 0;
5174 }
5175
5176 static ssize_t
5177 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5178                       size_t count, loff_t *ppos)
5179 {
5180         struct trace_array *tr = file_inode(filp)->i_private;
5181         cpumask_var_t tracing_cpumask_new;
5182         int err;
5183
5184         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5185                 return -ENOMEM;
5186
5187         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5188         if (err)
5189                 goto err_free;
5190
5191         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5192         if (err)
5193                 goto err_free;
5194
5195         free_cpumask_var(tracing_cpumask_new);
5196
5197         return count;
5198
5199 err_free:
5200         free_cpumask_var(tracing_cpumask_new);
5201
5202         return err;
5203 }
5204
5205 static const struct file_operations tracing_cpumask_fops = {
5206         .open           = tracing_open_generic_tr,
5207         .read           = tracing_cpumask_read,
5208         .write          = tracing_cpumask_write,
5209         .release        = tracing_release_generic_tr,
5210         .llseek         = generic_file_llseek,
5211 };
5212
5213 static int tracing_trace_options_show(struct seq_file *m, void *v)
5214 {
5215         struct tracer_opt *trace_opts;
5216         struct trace_array *tr = m->private;
5217         u32 tracer_flags;
5218         int i;
5219
5220         mutex_lock(&trace_types_lock);
5221         tracer_flags = tr->current_trace->flags->val;
5222         trace_opts = tr->current_trace->flags->opts;
5223
5224         for (i = 0; trace_options[i]; i++) {
5225                 if (tr->trace_flags & (1 << i))
5226                         seq_printf(m, "%s\n", trace_options[i]);
5227                 else
5228                         seq_printf(m, "no%s\n", trace_options[i]);
5229         }
5230
5231         for (i = 0; trace_opts[i].name; i++) {
5232                 if (tracer_flags & trace_opts[i].bit)
5233                         seq_printf(m, "%s\n", trace_opts[i].name);
5234                 else
5235                         seq_printf(m, "no%s\n", trace_opts[i].name);
5236         }
5237         mutex_unlock(&trace_types_lock);
5238
5239         return 0;
5240 }
5241
5242 static int __set_tracer_option(struct trace_array *tr,
5243                                struct tracer_flags *tracer_flags,
5244                                struct tracer_opt *opts, int neg)
5245 {
5246         struct tracer *trace = tracer_flags->trace;
5247         int ret;
5248
5249         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5250         if (ret)
5251                 return ret;
5252
5253         if (neg)
5254                 tracer_flags->val &= ~opts->bit;
5255         else
5256                 tracer_flags->val |= opts->bit;
5257         return 0;
5258 }
5259
5260 /* Try to assign a tracer specific option */
5261 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5262 {
5263         struct tracer *trace = tr->current_trace;
5264         struct tracer_flags *tracer_flags = trace->flags;
5265         struct tracer_opt *opts = NULL;
5266         int i;
5267
5268         for (i = 0; tracer_flags->opts[i].name; i++) {
5269                 opts = &tracer_flags->opts[i];
5270
5271                 if (strcmp(cmp, opts->name) == 0)
5272                         return __set_tracer_option(tr, trace->flags, opts, neg);
5273         }
5274
5275         return -EINVAL;
5276 }
5277
5278 /* Some tracers require overwrite to stay enabled */
5279 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5280 {
5281         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5282                 return -1;
5283
5284         return 0;
5285 }
5286
5287 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5288 {
5289         int *map;
5290
5291         if ((mask == TRACE_ITER_RECORD_TGID) ||
5292             (mask == TRACE_ITER_RECORD_CMD))
5293                 lockdep_assert_held(&event_mutex);
5294
5295         /* do nothing if flag is already set */
5296         if (!!(tr->trace_flags & mask) == !!enabled)
5297                 return 0;
5298
5299         /* Give the tracer a chance to approve the change */
5300         if (tr->current_trace->flag_changed)
5301                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5302                         return -EINVAL;
5303
5304         if (enabled)
5305                 tr->trace_flags |= mask;
5306         else
5307                 tr->trace_flags &= ~mask;
5308
5309         if (mask == TRACE_ITER_RECORD_CMD)
5310                 trace_event_enable_cmd_record(enabled);
5311
5312         if (mask == TRACE_ITER_RECORD_TGID) {
5313                 if (!tgid_map) {
5314                         tgid_map_max = pid_max;
5315                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5316                                        GFP_KERNEL);
5317
5318                         /*
5319                          * Pairs with smp_load_acquire() in
5320                          * trace_find_tgid_ptr() to ensure that if it observes
5321                          * the tgid_map we just allocated then it also observes
5322                          * the corresponding tgid_map_max value.
5323                          */
5324                         smp_store_release(&tgid_map, map);
5325                 }
5326                 if (!tgid_map) {
5327                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5328                         return -ENOMEM;
5329                 }
5330
5331                 trace_event_enable_tgid_record(enabled);
5332         }
5333
5334         if (mask == TRACE_ITER_EVENT_FORK)
5335                 trace_event_follow_fork(tr, enabled);
5336
5337         if (mask == TRACE_ITER_FUNC_FORK)
5338                 ftrace_pid_follow_fork(tr, enabled);
5339
5340         if (mask == TRACE_ITER_OVERWRITE) {
5341                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5342 #ifdef CONFIG_TRACER_MAX_TRACE
5343                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5344 #endif
5345         }
5346
5347         if (mask == TRACE_ITER_PRINTK) {
5348                 trace_printk_start_stop_comm(enabled);
5349                 trace_printk_control(enabled);
5350         }
5351
5352         return 0;
5353 }
5354
5355 int trace_set_options(struct trace_array *tr, char *option)
5356 {
5357         char *cmp;
5358         int neg = 0;
5359         int ret;
5360         size_t orig_len = strlen(option);
5361         int len;
5362
5363         cmp = strstrip(option);
5364
5365         len = str_has_prefix(cmp, "no");
5366         if (len)
5367                 neg = 1;
5368
5369         cmp += len;
5370
5371         mutex_lock(&event_mutex);
5372         mutex_lock(&trace_types_lock);
5373
5374         ret = match_string(trace_options, -1, cmp);
5375         /* If no option could be set, test the specific tracer options */
5376         if (ret < 0)
5377                 ret = set_tracer_option(tr, cmp, neg);
5378         else
5379                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5380
5381         mutex_unlock(&trace_types_lock);
5382         mutex_unlock(&event_mutex);
5383
5384         /*
5385          * If the first trailing whitespace is replaced with '\0' by strstrip,
5386          * turn it back into a space.
5387          */
5388         if (orig_len > strlen(option))
5389                 option[strlen(option)] = ' ';
5390
5391         return ret;
5392 }
5393
5394 static void __init apply_trace_boot_options(void)
5395 {
5396         char *buf = trace_boot_options_buf;
5397         char *option;
5398
5399         while (true) {
5400                 option = strsep(&buf, ",");
5401
5402                 if (!option)
5403                         break;
5404
5405                 if (*option)
5406                         trace_set_options(&global_trace, option);
5407
5408                 /* Put back the comma to allow this to be called again */
5409                 if (buf)
5410                         *(buf - 1) = ',';
5411         }
5412 }
5413
5414 static ssize_t
5415 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5416                         size_t cnt, loff_t *ppos)
5417 {
5418         struct seq_file *m = filp->private_data;
5419         struct trace_array *tr = m->private;
5420         char buf[64];
5421         int ret;
5422
5423         if (cnt >= sizeof(buf))
5424                 return -EINVAL;
5425
5426         if (copy_from_user(buf, ubuf, cnt))
5427                 return -EFAULT;
5428
5429         buf[cnt] = 0;
5430
5431         ret = trace_set_options(tr, buf);
5432         if (ret < 0)
5433                 return ret;
5434
5435         *ppos += cnt;
5436
5437         return cnt;
5438 }
5439
5440 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5441 {
5442         struct trace_array *tr = inode->i_private;
5443         int ret;
5444
5445         ret = tracing_check_open_get_tr(tr);
5446         if (ret)
5447                 return ret;
5448
5449         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5450         if (ret < 0)
5451                 trace_array_put(tr);
5452
5453         return ret;
5454 }
5455
5456 static const struct file_operations tracing_iter_fops = {
5457         .open           = tracing_trace_options_open,
5458         .read           = seq_read,
5459         .llseek         = seq_lseek,
5460         .release        = tracing_single_release_tr,
5461         .write          = tracing_trace_options_write,
5462 };
5463
5464 static const char readme_msg[] =
5465         "tracing mini-HOWTO:\n\n"
5466         "# echo 0 > tracing_on : quick way to disable tracing\n"
5467         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5468         " Important files:\n"
5469         "  trace\t\t\t- The static contents of the buffer\n"
5470         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5471         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5472         "  current_tracer\t- function and latency tracers\n"
5473         "  available_tracers\t- list of configured tracers for current_tracer\n"
5474         "  error_log\t- error log for failed commands (that support it)\n"
5475         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5476         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5477         "  trace_clock\t\t-change the clock used to order events\n"
5478         "       local:   Per cpu clock but may not be synced across CPUs\n"
5479         "      global:   Synced across CPUs but slows tracing down.\n"
5480         "     counter:   Not a clock, but just an increment\n"
5481         "      uptime:   Jiffy counter from time of boot\n"
5482         "        perf:   Same clock that perf events use\n"
5483 #ifdef CONFIG_X86_64
5484         "     x86-tsc:   TSC cycle counter\n"
5485 #endif
5486         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5487         "       delta:   Delta difference against a buffer-wide timestamp\n"
5488         "    absolute:   Absolute (standalone) timestamp\n"
5489         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5490         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5491         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5492         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5493         "\t\t\t  Remove sub-buffer with rmdir\n"
5494         "  trace_options\t\t- Set format or modify how tracing happens\n"
5495         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5496         "\t\t\t  option name\n"
5497         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5498 #ifdef CONFIG_DYNAMIC_FTRACE
5499         "\n  available_filter_functions - list of functions that can be filtered on\n"
5500         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5501         "\t\t\t  functions\n"
5502         "\t     accepts: func_full_name or glob-matching-pattern\n"
5503         "\t     modules: Can select a group via module\n"
5504         "\t      Format: :mod:<module-name>\n"
5505         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5506         "\t    triggers: a command to perform when function is hit\n"
5507         "\t      Format: <function>:<trigger>[:count]\n"
5508         "\t     trigger: traceon, traceoff\n"
5509         "\t\t      enable_event:<system>:<event>\n"
5510         "\t\t      disable_event:<system>:<event>\n"
5511 #ifdef CONFIG_STACKTRACE
5512         "\t\t      stacktrace\n"
5513 #endif
5514 #ifdef CONFIG_TRACER_SNAPSHOT
5515         "\t\t      snapshot\n"
5516 #endif
5517         "\t\t      dump\n"
5518         "\t\t      cpudump\n"
5519         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5520         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5521         "\t     The first one will disable tracing every time do_fault is hit\n"
5522         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5523         "\t       The first time do trap is hit and it disables tracing, the\n"
5524         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5525         "\t       the counter will not decrement. It only decrements when the\n"
5526         "\t       trigger did work\n"
5527         "\t     To remove trigger without count:\n"
5528         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5529         "\t     To remove trigger with a count:\n"
5530         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5531         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5532         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5533         "\t    modules: Can select a group via module command :mod:\n"
5534         "\t    Does not accept triggers\n"
5535 #endif /* CONFIG_DYNAMIC_FTRACE */
5536 #ifdef CONFIG_FUNCTION_TRACER
5537         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5538         "\t\t    (function)\n"
5539         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5540         "\t\t    (function)\n"
5541 #endif
5542 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5543         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5544         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5545         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5546 #endif
5547 #ifdef CONFIG_TRACER_SNAPSHOT
5548         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5549         "\t\t\t  snapshot buffer. Read the contents for more\n"
5550         "\t\t\t  information\n"
5551 #endif
5552 #ifdef CONFIG_STACK_TRACER
5553         "  stack_trace\t\t- Shows the max stack trace when active\n"
5554         "  stack_max_size\t- Shows current max stack size that was traced\n"
5555         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5556         "\t\t\t  new trace)\n"
5557 #ifdef CONFIG_DYNAMIC_FTRACE
5558         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5559         "\t\t\t  traces\n"
5560 #endif
5561 #endif /* CONFIG_STACK_TRACER */
5562 #ifdef CONFIG_DYNAMIC_EVENTS
5563         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5564         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5565 #endif
5566 #ifdef CONFIG_KPROBE_EVENTS
5567         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5568         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5569 #endif
5570 #ifdef CONFIG_UPROBE_EVENTS
5571         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5572         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5573 #endif
5574 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5575         "\t  accepts: event-definitions (one definition per line)\n"
5576         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5577         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5578 #ifdef CONFIG_HIST_TRIGGERS
5579         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5580 #endif
5581         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5582         "\t           -:[<group>/]<event>\n"
5583 #ifdef CONFIG_KPROBE_EVENTS
5584         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5585   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5586 #endif
5587 #ifdef CONFIG_UPROBE_EVENTS
5588   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5589 #endif
5590         "\t     args: <name>=fetcharg[:type]\n"
5591         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5592 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5593         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5594 #else
5595         "\t           $stack<index>, $stack, $retval, $comm,\n"
5596 #endif
5597         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5598         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5599         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5600         "\t           <type>\\[<array-size>\\]\n"
5601 #ifdef CONFIG_HIST_TRIGGERS
5602         "\t    field: <stype> <name>;\n"
5603         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5604         "\t           [unsigned] char/int/long\n"
5605 #endif
5606         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5607         "\t            of the <attached-group>/<attached-event>.\n"
5608 #endif
5609         "  events/\t\t- Directory containing all trace event subsystems:\n"
5610         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5611         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5612         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5613         "\t\t\t  events\n"
5614         "      filter\t\t- If set, only events passing filter are traced\n"
5615         "  events/<system>/<event>/\t- Directory containing control files for\n"
5616         "\t\t\t  <event>:\n"
5617         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5618         "      filter\t\t- If set, only events passing filter are traced\n"
5619         "      trigger\t\t- If set, a command to perform when event is hit\n"
5620         "\t    Format: <trigger>[:count][if <filter>]\n"
5621         "\t   trigger: traceon, traceoff\n"
5622         "\t            enable_event:<system>:<event>\n"
5623         "\t            disable_event:<system>:<event>\n"
5624 #ifdef CONFIG_HIST_TRIGGERS
5625         "\t            enable_hist:<system>:<event>\n"
5626         "\t            disable_hist:<system>:<event>\n"
5627 #endif
5628 #ifdef CONFIG_STACKTRACE
5629         "\t\t    stacktrace\n"
5630 #endif
5631 #ifdef CONFIG_TRACER_SNAPSHOT
5632         "\t\t    snapshot\n"
5633 #endif
5634 #ifdef CONFIG_HIST_TRIGGERS
5635         "\t\t    hist (see below)\n"
5636 #endif
5637         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5638         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5639         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5640         "\t                  events/block/block_unplug/trigger\n"
5641         "\t   The first disables tracing every time block_unplug is hit.\n"
5642         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5643         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5644         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5645         "\t   Like function triggers, the counter is only decremented if it\n"
5646         "\t    enabled or disabled tracing.\n"
5647         "\t   To remove a trigger without a count:\n"
5648         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5649         "\t   To remove a trigger with a count:\n"
5650         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5651         "\t   Filters can be ignored when removing a trigger.\n"
5652 #ifdef CONFIG_HIST_TRIGGERS
5653         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5654         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5655         "\t            [:values=<field1[,field2,...]>]\n"
5656         "\t            [:sort=<field1[,field2,...]>]\n"
5657         "\t            [:size=#entries]\n"
5658         "\t            [:pause][:continue][:clear]\n"
5659         "\t            [:name=histname1]\n"
5660         "\t            [:<handler>.<action>]\n"
5661         "\t            [if <filter>]\n\n"
5662         "\t    Note, special fields can be used as well:\n"
5663         "\t            common_timestamp - to record current timestamp\n"
5664         "\t            common_cpu - to record the CPU the event happened on\n"
5665         "\n"
5666         "\t    When a matching event is hit, an entry is added to a hash\n"
5667         "\t    table using the key(s) and value(s) named, and the value of a\n"
5668         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5669         "\t    correspond to fields in the event's format description.  Keys\n"
5670         "\t    can be any field, or the special string 'stacktrace'.\n"
5671         "\t    Compound keys consisting of up to two fields can be specified\n"
5672         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5673         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5674         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5675         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5676         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5677         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5678         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5679         "\t    its histogram data will be shared with other triggers of the\n"
5680         "\t    same name, and trigger hits will update this common data.\n\n"
5681         "\t    Reading the 'hist' file for the event will dump the hash\n"
5682         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5683         "\t    triggers attached to an event, there will be a table for each\n"
5684         "\t    trigger in the output.  The table displayed for a named\n"
5685         "\t    trigger will be the same as any other instance having the\n"
5686         "\t    same name.  The default format used to display a given field\n"
5687         "\t    can be modified by appending any of the following modifiers\n"
5688         "\t    to the field name, as applicable:\n\n"
5689         "\t            .hex        display a number as a hex value\n"
5690         "\t            .sym        display an address as a symbol\n"
5691         "\t            .sym-offset display an address as a symbol and offset\n"
5692         "\t            .execname   display a common_pid as a program name\n"
5693         "\t            .syscall    display a syscall id as a syscall name\n"
5694         "\t            .log2       display log2 value rather than raw number\n"
5695         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5696         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5697         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5698         "\t    trigger or to start a hist trigger but not log any events\n"
5699         "\t    until told to do so.  'continue' can be used to start or\n"
5700         "\t    restart a paused hist trigger.\n\n"
5701         "\t    The 'clear' parameter will clear the contents of a running\n"
5702         "\t    hist trigger and leave its current paused/active state\n"
5703         "\t    unchanged.\n\n"
5704         "\t    The enable_hist and disable_hist triggers can be used to\n"
5705         "\t    have one event conditionally start and stop another event's\n"
5706         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5707         "\t    the enable_event and disable_event triggers.\n\n"
5708         "\t    Hist trigger handlers and actions are executed whenever a\n"
5709         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5710         "\t        <handler>.<action>\n\n"
5711         "\t    The available handlers are:\n\n"
5712         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5713         "\t        onmax(var)               - invoke if var exceeds current max\n"
5714         "\t        onchange(var)            - invoke action if var changes\n\n"
5715         "\t    The available actions are:\n\n"
5716         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5717         "\t        save(field,...)                      - save current event fields\n"
5718 #ifdef CONFIG_TRACER_SNAPSHOT
5719         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5720 #endif
5721 #ifdef CONFIG_SYNTH_EVENTS
5722         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5723         "\t  Write into this file to define/undefine new synthetic events.\n"
5724         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5725 #endif
5726 #endif
5727 ;
5728
5729 static ssize_t
5730 tracing_readme_read(struct file *filp, char __user *ubuf,
5731                        size_t cnt, loff_t *ppos)
5732 {
5733         return simple_read_from_buffer(ubuf, cnt, ppos,
5734                                         readme_msg, strlen(readme_msg));
5735 }
5736
5737 static const struct file_operations tracing_readme_fops = {
5738         .open           = tracing_open_generic,
5739         .read           = tracing_readme_read,
5740         .llseek         = generic_file_llseek,
5741 };
5742
5743 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5744 {
5745         int pid = ++(*pos);
5746
5747         return trace_find_tgid_ptr(pid);
5748 }
5749
5750 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5751 {
5752         int pid = *pos;
5753
5754         return trace_find_tgid_ptr(pid);
5755 }
5756
5757 static void saved_tgids_stop(struct seq_file *m, void *v)
5758 {
5759 }
5760
5761 static int saved_tgids_show(struct seq_file *m, void *v)
5762 {
5763         int *entry = (int *)v;
5764         int pid = entry - tgid_map;
5765         int tgid = *entry;
5766
5767         if (tgid == 0)
5768                 return SEQ_SKIP;
5769
5770         seq_printf(m, "%d %d\n", pid, tgid);
5771         return 0;
5772 }
5773
5774 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5775         .start          = saved_tgids_start,
5776         .stop           = saved_tgids_stop,
5777         .next           = saved_tgids_next,
5778         .show           = saved_tgids_show,
5779 };
5780
5781 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5782 {
5783         int ret;
5784
5785         ret = tracing_check_open_get_tr(NULL);
5786         if (ret)
5787                 return ret;
5788
5789         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5790 }
5791
5792
5793 static const struct file_operations tracing_saved_tgids_fops = {
5794         .open           = tracing_saved_tgids_open,
5795         .read           = seq_read,
5796         .llseek         = seq_lseek,
5797         .release        = seq_release,
5798 };
5799
5800 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5801 {
5802         unsigned int *ptr = v;
5803
5804         if (*pos || m->count)
5805                 ptr++;
5806
5807         (*pos)++;
5808
5809         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5810              ptr++) {
5811                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5812                         continue;
5813
5814                 return ptr;
5815         }
5816
5817         return NULL;
5818 }
5819
5820 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5821 {
5822         void *v;
5823         loff_t l = 0;
5824
5825         preempt_disable();
5826         arch_spin_lock(&trace_cmdline_lock);
5827
5828         v = &savedcmd->map_cmdline_to_pid[0];
5829         while (l <= *pos) {
5830                 v = saved_cmdlines_next(m, v, &l);
5831                 if (!v)
5832                         return NULL;
5833         }
5834
5835         return v;
5836 }
5837
5838 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5839 {
5840         arch_spin_unlock(&trace_cmdline_lock);
5841         preempt_enable();
5842 }
5843
5844 static int saved_cmdlines_show(struct seq_file *m, void *v)
5845 {
5846         char buf[TASK_COMM_LEN];
5847         unsigned int *pid = v;
5848
5849         __trace_find_cmdline(*pid, buf);
5850         seq_printf(m, "%d %s\n", *pid, buf);
5851         return 0;
5852 }
5853
5854 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5855         .start          = saved_cmdlines_start,
5856         .next           = saved_cmdlines_next,
5857         .stop           = saved_cmdlines_stop,
5858         .show           = saved_cmdlines_show,
5859 };
5860
5861 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5862 {
5863         int ret;
5864
5865         ret = tracing_check_open_get_tr(NULL);
5866         if (ret)
5867                 return ret;
5868
5869         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5870 }
5871
5872 static const struct file_operations tracing_saved_cmdlines_fops = {
5873         .open           = tracing_saved_cmdlines_open,
5874         .read           = seq_read,
5875         .llseek         = seq_lseek,
5876         .release        = seq_release,
5877 };
5878
5879 static ssize_t
5880 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5881                                  size_t cnt, loff_t *ppos)
5882 {
5883         char buf[64];
5884         int r;
5885
5886         arch_spin_lock(&trace_cmdline_lock);
5887         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5888         arch_spin_unlock(&trace_cmdline_lock);
5889
5890         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5891 }
5892
5893 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5894 {
5895         kfree(s->saved_cmdlines);
5896         kfree(s->map_cmdline_to_pid);
5897         kfree(s);
5898 }
5899
5900 static int tracing_resize_saved_cmdlines(unsigned int val)
5901 {
5902         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5903
5904         s = kmalloc(sizeof(*s), GFP_KERNEL);
5905         if (!s)
5906                 return -ENOMEM;
5907
5908         if (allocate_cmdlines_buffer(val, s) < 0) {
5909                 kfree(s);
5910                 return -ENOMEM;
5911         }
5912
5913         arch_spin_lock(&trace_cmdline_lock);
5914         savedcmd_temp = savedcmd;
5915         savedcmd = s;
5916         arch_spin_unlock(&trace_cmdline_lock);
5917         free_saved_cmdlines_buffer(savedcmd_temp);
5918
5919         return 0;
5920 }
5921
5922 static ssize_t
5923 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5924                                   size_t cnt, loff_t *ppos)
5925 {
5926         unsigned long val;
5927         int ret;
5928
5929         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5930         if (ret)
5931                 return ret;
5932
5933         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5934         if (!val || val > PID_MAX_DEFAULT)
5935                 return -EINVAL;
5936
5937         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5938         if (ret < 0)
5939                 return ret;
5940
5941         *ppos += cnt;
5942
5943         return cnt;
5944 }
5945
5946 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5947         .open           = tracing_open_generic,
5948         .read           = tracing_saved_cmdlines_size_read,
5949         .write          = tracing_saved_cmdlines_size_write,
5950 };
5951
5952 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5953 static union trace_eval_map_item *
5954 update_eval_map(union trace_eval_map_item *ptr)
5955 {
5956         if (!ptr->map.eval_string) {
5957                 if (ptr->tail.next) {
5958                         ptr = ptr->tail.next;
5959                         /* Set ptr to the next real item (skip head) */
5960                         ptr++;
5961                 } else
5962                         return NULL;
5963         }
5964         return ptr;
5965 }
5966
5967 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5968 {
5969         union trace_eval_map_item *ptr = v;
5970
5971         /*
5972          * Paranoid! If ptr points to end, we don't want to increment past it.
5973          * This really should never happen.
5974          */
5975         (*pos)++;
5976         ptr = update_eval_map(ptr);
5977         if (WARN_ON_ONCE(!ptr))
5978                 return NULL;
5979
5980         ptr++;
5981         ptr = update_eval_map(ptr);
5982
5983         return ptr;
5984 }
5985
5986 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5987 {
5988         union trace_eval_map_item *v;
5989         loff_t l = 0;
5990
5991         mutex_lock(&trace_eval_mutex);
5992
5993         v = trace_eval_maps;
5994         if (v)
5995                 v++;
5996
5997         while (v && l < *pos) {
5998                 v = eval_map_next(m, v, &l);
5999         }
6000
6001         return v;
6002 }
6003
6004 static void eval_map_stop(struct seq_file *m, void *v)
6005 {
6006         mutex_unlock(&trace_eval_mutex);
6007 }
6008
6009 static int eval_map_show(struct seq_file *m, void *v)
6010 {
6011         union trace_eval_map_item *ptr = v;
6012
6013         seq_printf(m, "%s %ld (%s)\n",
6014                    ptr->map.eval_string, ptr->map.eval_value,
6015                    ptr->map.system);
6016
6017         return 0;
6018 }
6019
6020 static const struct seq_operations tracing_eval_map_seq_ops = {
6021         .start          = eval_map_start,
6022         .next           = eval_map_next,
6023         .stop           = eval_map_stop,
6024         .show           = eval_map_show,
6025 };
6026
6027 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6028 {
6029         int ret;
6030
6031         ret = tracing_check_open_get_tr(NULL);
6032         if (ret)
6033                 return ret;
6034
6035         return seq_open(filp, &tracing_eval_map_seq_ops);
6036 }
6037
6038 static const struct file_operations tracing_eval_map_fops = {
6039         .open           = tracing_eval_map_open,
6040         .read           = seq_read,
6041         .llseek         = seq_lseek,
6042         .release        = seq_release,
6043 };
6044
6045 static inline union trace_eval_map_item *
6046 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6047 {
6048         /* Return tail of array given the head */
6049         return ptr + ptr->head.length + 1;
6050 }
6051
6052 static void
6053 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6054                            int len)
6055 {
6056         struct trace_eval_map **stop;
6057         struct trace_eval_map **map;
6058         union trace_eval_map_item *map_array;
6059         union trace_eval_map_item *ptr;
6060
6061         stop = start + len;
6062
6063         /*
6064          * The trace_eval_maps contains the map plus a head and tail item,
6065          * where the head holds the module and length of array, and the
6066          * tail holds a pointer to the next list.
6067          */
6068         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6069         if (!map_array) {
6070                 pr_warn("Unable to allocate trace eval mapping\n");
6071                 return;
6072         }
6073
6074         mutex_lock(&trace_eval_mutex);
6075
6076         if (!trace_eval_maps)
6077                 trace_eval_maps = map_array;
6078         else {
6079                 ptr = trace_eval_maps;
6080                 for (;;) {
6081                         ptr = trace_eval_jmp_to_tail(ptr);
6082                         if (!ptr->tail.next)
6083                                 break;
6084                         ptr = ptr->tail.next;
6085
6086                 }
6087                 ptr->tail.next = map_array;
6088         }
6089         map_array->head.mod = mod;
6090         map_array->head.length = len;
6091         map_array++;
6092
6093         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6094                 map_array->map = **map;
6095                 map_array++;
6096         }
6097         memset(map_array, 0, sizeof(*map_array));
6098
6099         mutex_unlock(&trace_eval_mutex);
6100 }
6101
6102 static void trace_create_eval_file(struct dentry *d_tracer)
6103 {
6104         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6105                           NULL, &tracing_eval_map_fops);
6106 }
6107
6108 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6109 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6110 static inline void trace_insert_eval_map_file(struct module *mod,
6111                               struct trace_eval_map **start, int len) { }
6112 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6113
6114 static void trace_insert_eval_map(struct module *mod,
6115                                   struct trace_eval_map **start, int len)
6116 {
6117         struct trace_eval_map **map;
6118
6119         if (len <= 0)
6120                 return;
6121
6122         map = start;
6123
6124         trace_event_eval_update(map, len);
6125
6126         trace_insert_eval_map_file(mod, start, len);
6127 }
6128
6129 static ssize_t
6130 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6131                        size_t cnt, loff_t *ppos)
6132 {
6133         struct trace_array *tr = filp->private_data;
6134         char buf[MAX_TRACER_SIZE+2];
6135         int r;
6136
6137         mutex_lock(&trace_types_lock);
6138         r = sprintf(buf, "%s\n", tr->current_trace->name);
6139         mutex_unlock(&trace_types_lock);
6140
6141         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6142 }
6143
6144 int tracer_init(struct tracer *t, struct trace_array *tr)
6145 {
6146         tracing_reset_online_cpus(&tr->array_buffer);
6147         return t->init(tr);
6148 }
6149
6150 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6151 {
6152         int cpu;
6153
6154         for_each_tracing_cpu(cpu)
6155                 per_cpu_ptr(buf->data, cpu)->entries = val;
6156 }
6157
6158 #ifdef CONFIG_TRACER_MAX_TRACE
6159 /* resize @tr's buffer to the size of @size_tr's entries */
6160 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6161                                         struct array_buffer *size_buf, int cpu_id)
6162 {
6163         int cpu, ret = 0;
6164
6165         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6166                 for_each_tracing_cpu(cpu) {
6167                         ret = ring_buffer_resize(trace_buf->buffer,
6168                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6169                         if (ret < 0)
6170                                 break;
6171                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6172                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6173                 }
6174         } else {
6175                 ret = ring_buffer_resize(trace_buf->buffer,
6176                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6177                 if (ret == 0)
6178                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6179                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6180         }
6181
6182         return ret;
6183 }
6184 #endif /* CONFIG_TRACER_MAX_TRACE */
6185
6186 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6187                                         unsigned long size, int cpu)
6188 {
6189         int ret;
6190
6191         /*
6192          * If kernel or user changes the size of the ring buffer
6193          * we use the size that was given, and we can forget about
6194          * expanding it later.
6195          */
6196         ring_buffer_expanded = true;
6197
6198         /* May be called before buffers are initialized */
6199         if (!tr->array_buffer.buffer)
6200                 return 0;
6201
6202         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6203         if (ret < 0)
6204                 return ret;
6205
6206 #ifdef CONFIG_TRACER_MAX_TRACE
6207         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6208             !tr->current_trace->use_max_tr)
6209                 goto out;
6210
6211         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6212         if (ret < 0) {
6213                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6214                                                      &tr->array_buffer, cpu);
6215                 if (r < 0) {
6216                         /*
6217                          * AARGH! We are left with different
6218                          * size max buffer!!!!
6219                          * The max buffer is our "snapshot" buffer.
6220                          * When a tracer needs a snapshot (one of the
6221                          * latency tracers), it swaps the max buffer
6222                          * with the saved snap shot. We succeeded to
6223                          * update the size of the main buffer, but failed to
6224                          * update the size of the max buffer. But when we tried
6225                          * to reset the main buffer to the original size, we
6226                          * failed there too. This is very unlikely to
6227                          * happen, but if it does, warn and kill all
6228                          * tracing.
6229                          */
6230                         WARN_ON(1);
6231                         tracing_disabled = 1;
6232                 }
6233                 return ret;
6234         }
6235
6236         if (cpu == RING_BUFFER_ALL_CPUS)
6237                 set_buffer_entries(&tr->max_buffer, size);
6238         else
6239                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6240
6241  out:
6242 #endif /* CONFIG_TRACER_MAX_TRACE */
6243
6244         if (cpu == RING_BUFFER_ALL_CPUS)
6245                 set_buffer_entries(&tr->array_buffer, size);
6246         else
6247                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6248
6249         return ret;
6250 }
6251
6252 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6253                                   unsigned long size, int cpu_id)
6254 {
6255         int ret;
6256
6257         mutex_lock(&trace_types_lock);
6258
6259         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6260                 /* make sure, this cpu is enabled in the mask */
6261                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6262                         ret = -EINVAL;
6263                         goto out;
6264                 }
6265         }
6266
6267         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6268         if (ret < 0)
6269                 ret = -ENOMEM;
6270
6271 out:
6272         mutex_unlock(&trace_types_lock);
6273
6274         return ret;
6275 }
6276
6277
6278 /**
6279  * tracing_update_buffers - used by tracing facility to expand ring buffers
6280  *
6281  * To save on memory when the tracing is never used on a system with it
6282  * configured in. The ring buffers are set to a minimum size. But once
6283  * a user starts to use the tracing facility, then they need to grow
6284  * to their default size.
6285  *
6286  * This function is to be called when a tracer is about to be used.
6287  */
6288 int tracing_update_buffers(void)
6289 {
6290         int ret = 0;
6291
6292         mutex_lock(&trace_types_lock);
6293         if (!ring_buffer_expanded)
6294                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6295                                                 RING_BUFFER_ALL_CPUS);
6296         mutex_unlock(&trace_types_lock);
6297
6298         return ret;
6299 }
6300
6301 struct trace_option_dentry;
6302
6303 static void
6304 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6305
6306 /*
6307  * Used to clear out the tracer before deletion of an instance.
6308  * Must have trace_types_lock held.
6309  */
6310 static void tracing_set_nop(struct trace_array *tr)
6311 {
6312         if (tr->current_trace == &nop_trace)
6313                 return;
6314
6315         tr->current_trace->enabled--;
6316
6317         if (tr->current_trace->reset)
6318                 tr->current_trace->reset(tr);
6319
6320         tr->current_trace = &nop_trace;
6321 }
6322
6323 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6324 {
6325         /* Only enable if the directory has been created already. */
6326         if (!tr->dir)
6327                 return;
6328
6329         create_trace_option_files(tr, t);
6330 }
6331
6332 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6333 {
6334         struct tracer *t;
6335 #ifdef CONFIG_TRACER_MAX_TRACE
6336         bool had_max_tr;
6337 #endif
6338         int ret = 0;
6339
6340         mutex_lock(&trace_types_lock);
6341
6342         if (!ring_buffer_expanded) {
6343                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6344                                                 RING_BUFFER_ALL_CPUS);
6345                 if (ret < 0)
6346                         goto out;
6347                 ret = 0;
6348         }
6349
6350         for (t = trace_types; t; t = t->next) {
6351                 if (strcmp(t->name, buf) == 0)
6352                         break;
6353         }
6354         if (!t) {
6355                 ret = -EINVAL;
6356                 goto out;
6357         }
6358         if (t == tr->current_trace)
6359                 goto out;
6360
6361 #ifdef CONFIG_TRACER_SNAPSHOT
6362         if (t->use_max_tr) {
6363                 arch_spin_lock(&tr->max_lock);
6364                 if (tr->cond_snapshot)
6365                         ret = -EBUSY;
6366                 arch_spin_unlock(&tr->max_lock);
6367                 if (ret)
6368                         goto out;
6369         }
6370 #endif
6371         /* Some tracers won't work on kernel command line */
6372         if (system_state < SYSTEM_RUNNING && t->noboot) {
6373                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6374                         t->name);
6375                 goto out;
6376         }
6377
6378         /* Some tracers are only allowed for the top level buffer */
6379         if (!trace_ok_for_array(t, tr)) {
6380                 ret = -EINVAL;
6381                 goto out;
6382         }
6383
6384         /* If trace pipe files are being read, we can't change the tracer */
6385         if (tr->trace_ref) {
6386                 ret = -EBUSY;
6387                 goto out;
6388         }
6389
6390         trace_branch_disable();
6391
6392         tr->current_trace->enabled--;
6393
6394         if (tr->current_trace->reset)
6395                 tr->current_trace->reset(tr);
6396
6397         /* Current trace needs to be nop_trace before synchronize_rcu */
6398         tr->current_trace = &nop_trace;
6399
6400 #ifdef CONFIG_TRACER_MAX_TRACE
6401         had_max_tr = tr->allocated_snapshot;
6402
6403         if (had_max_tr && !t->use_max_tr) {
6404                 /*
6405                  * We need to make sure that the update_max_tr sees that
6406                  * current_trace changed to nop_trace to keep it from
6407                  * swapping the buffers after we resize it.
6408                  * The update_max_tr is called from interrupts disabled
6409                  * so a synchronized_sched() is sufficient.
6410                  */
6411                 synchronize_rcu();
6412                 free_snapshot(tr);
6413         }
6414 #endif
6415
6416 #ifdef CONFIG_TRACER_MAX_TRACE
6417         if (t->use_max_tr && !had_max_tr) {
6418                 ret = tracing_alloc_snapshot_instance(tr);
6419                 if (ret < 0)
6420                         goto out;
6421         }
6422 #endif
6423
6424         if (t->init) {
6425                 ret = tracer_init(t, tr);
6426                 if (ret)
6427                         goto out;
6428         }
6429
6430         tr->current_trace = t;
6431         tr->current_trace->enabled++;
6432         trace_branch_enable(tr);
6433  out:
6434         mutex_unlock(&trace_types_lock);
6435
6436         return ret;
6437 }
6438
6439 static ssize_t
6440 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6441                         size_t cnt, loff_t *ppos)
6442 {
6443         struct trace_array *tr = filp->private_data;
6444         char buf[MAX_TRACER_SIZE+1];
6445         int i;
6446         size_t ret;
6447         int err;
6448
6449         ret = cnt;
6450
6451         if (cnt > MAX_TRACER_SIZE)
6452                 cnt = MAX_TRACER_SIZE;
6453
6454         if (copy_from_user(buf, ubuf, cnt))
6455                 return -EFAULT;
6456
6457         buf[cnt] = 0;
6458
6459         /* strip ending whitespace. */
6460         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6461                 buf[i] = 0;
6462
6463         err = tracing_set_tracer(tr, buf);
6464         if (err)
6465                 return err;
6466
6467         *ppos += ret;
6468
6469         return ret;
6470 }
6471
6472 static ssize_t
6473 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6474                    size_t cnt, loff_t *ppos)
6475 {
6476         char buf[64];
6477         int r;
6478
6479         r = snprintf(buf, sizeof(buf), "%ld\n",
6480                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6481         if (r > sizeof(buf))
6482                 r = sizeof(buf);
6483         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6484 }
6485
6486 static ssize_t
6487 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6488                     size_t cnt, loff_t *ppos)
6489 {
6490         unsigned long val;
6491         int ret;
6492
6493         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6494         if (ret)
6495                 return ret;
6496
6497         *ptr = val * 1000;
6498
6499         return cnt;
6500 }
6501
6502 static ssize_t
6503 tracing_thresh_read(struct file *filp, char __user *ubuf,
6504                     size_t cnt, loff_t *ppos)
6505 {
6506         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6507 }
6508
6509 static ssize_t
6510 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6511                      size_t cnt, loff_t *ppos)
6512 {
6513         struct trace_array *tr = filp->private_data;
6514         int ret;
6515
6516         mutex_lock(&trace_types_lock);
6517         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6518         if (ret < 0)
6519                 goto out;
6520
6521         if (tr->current_trace->update_thresh) {
6522                 ret = tr->current_trace->update_thresh(tr);
6523                 if (ret < 0)
6524                         goto out;
6525         }
6526
6527         ret = cnt;
6528 out:
6529         mutex_unlock(&trace_types_lock);
6530
6531         return ret;
6532 }
6533
6534 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6535
6536 static ssize_t
6537 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6538                      size_t cnt, loff_t *ppos)
6539 {
6540         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6541 }
6542
6543 static ssize_t
6544 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6545                       size_t cnt, loff_t *ppos)
6546 {
6547         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6548 }
6549
6550 #endif
6551
6552 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6553 {
6554         struct trace_array *tr = inode->i_private;
6555         struct trace_iterator *iter;
6556         int ret;
6557
6558         ret = tracing_check_open_get_tr(tr);
6559         if (ret)
6560                 return ret;
6561
6562         mutex_lock(&trace_types_lock);
6563
6564         /* create a buffer to store the information to pass to userspace */
6565         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6566         if (!iter) {
6567                 ret = -ENOMEM;
6568                 __trace_array_put(tr);
6569                 goto out;
6570         }
6571
6572         trace_seq_init(&iter->seq);
6573         iter->trace = tr->current_trace;
6574
6575         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6576                 ret = -ENOMEM;
6577                 goto fail;
6578         }
6579
6580         /* trace pipe does not show start of buffer */
6581         cpumask_setall(iter->started);
6582
6583         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6584                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6585
6586         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6587         if (trace_clocks[tr->clock_id].in_ns)
6588                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6589
6590         iter->tr = tr;
6591         iter->array_buffer = &tr->array_buffer;
6592         iter->cpu_file = tracing_get_cpu(inode);
6593         mutex_init(&iter->mutex);
6594         filp->private_data = iter;
6595
6596         if (iter->trace->pipe_open)
6597                 iter->trace->pipe_open(iter);
6598
6599         nonseekable_open(inode, filp);
6600
6601         tr->trace_ref++;
6602 out:
6603         mutex_unlock(&trace_types_lock);
6604         return ret;
6605
6606 fail:
6607         kfree(iter);
6608         __trace_array_put(tr);
6609         mutex_unlock(&trace_types_lock);
6610         return ret;
6611 }
6612
6613 static int tracing_release_pipe(struct inode *inode, struct file *file)
6614 {
6615         struct trace_iterator *iter = file->private_data;
6616         struct trace_array *tr = inode->i_private;
6617
6618         mutex_lock(&trace_types_lock);
6619
6620         tr->trace_ref--;
6621
6622         if (iter->trace->pipe_close)
6623                 iter->trace->pipe_close(iter);
6624
6625         mutex_unlock(&trace_types_lock);
6626
6627         free_cpumask_var(iter->started);
6628         mutex_destroy(&iter->mutex);
6629         kfree(iter);
6630
6631         trace_array_put(tr);
6632
6633         return 0;
6634 }
6635
6636 static __poll_t
6637 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6638 {
6639         struct trace_array *tr = iter->tr;
6640
6641         /* Iterators are static, they should be filled or empty */
6642         if (trace_buffer_iter(iter, iter->cpu_file))
6643                 return EPOLLIN | EPOLLRDNORM;
6644
6645         if (tr->trace_flags & TRACE_ITER_BLOCK)
6646                 /*
6647                  * Always select as readable when in blocking mode
6648                  */
6649                 return EPOLLIN | EPOLLRDNORM;
6650         else
6651                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6652                                              filp, poll_table);
6653 }
6654
6655 static __poll_t
6656 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6657 {
6658         struct trace_iterator *iter = filp->private_data;
6659
6660         return trace_poll(iter, filp, poll_table);
6661 }
6662
6663 /* Must be called with iter->mutex held. */
6664 static int tracing_wait_pipe(struct file *filp)
6665 {
6666         struct trace_iterator *iter = filp->private_data;
6667         int ret;
6668
6669         while (trace_empty(iter)) {
6670
6671                 if ((filp->f_flags & O_NONBLOCK)) {
6672                         return -EAGAIN;
6673                 }
6674
6675                 /*
6676                  * We block until we read something and tracing is disabled.
6677                  * We still block if tracing is disabled, but we have never
6678                  * read anything. This allows a user to cat this file, and
6679                  * then enable tracing. But after we have read something,
6680                  * we give an EOF when tracing is again disabled.
6681                  *
6682                  * iter->pos will be 0 if we haven't read anything.
6683                  */
6684                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6685                         break;
6686
6687                 mutex_unlock(&iter->mutex);
6688
6689                 ret = wait_on_pipe(iter, 0);
6690
6691                 mutex_lock(&iter->mutex);
6692
6693                 if (ret)
6694                         return ret;
6695         }
6696
6697         return 1;
6698 }
6699
6700 /*
6701  * Consumer reader.
6702  */
6703 static ssize_t
6704 tracing_read_pipe(struct file *filp, char __user *ubuf,
6705                   size_t cnt, loff_t *ppos)
6706 {
6707         struct trace_iterator *iter = filp->private_data;
6708         ssize_t sret;
6709
6710         /*
6711          * Avoid more than one consumer on a single file descriptor
6712          * This is just a matter of traces coherency, the ring buffer itself
6713          * is protected.
6714          */
6715         mutex_lock(&iter->mutex);
6716
6717         /* return any leftover data */
6718         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6719         if (sret != -EBUSY)
6720                 goto out;
6721
6722         trace_seq_init(&iter->seq);
6723
6724         if (iter->trace->read) {
6725                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6726                 if (sret)
6727                         goto out;
6728         }
6729
6730 waitagain:
6731         sret = tracing_wait_pipe(filp);
6732         if (sret <= 0)
6733                 goto out;
6734
6735         /* stop when tracing is finished */
6736         if (trace_empty(iter)) {
6737                 sret = 0;
6738                 goto out;
6739         }
6740
6741         if (cnt >= PAGE_SIZE)
6742                 cnt = PAGE_SIZE - 1;
6743
6744         /* reset all but tr, trace, and overruns */
6745         memset(&iter->seq, 0,
6746                sizeof(struct trace_iterator) -
6747                offsetof(struct trace_iterator, seq));
6748         cpumask_clear(iter->started);
6749         trace_seq_init(&iter->seq);
6750         iter->pos = -1;
6751
6752         trace_event_read_lock();
6753         trace_access_lock(iter->cpu_file);
6754         while (trace_find_next_entry_inc(iter) != NULL) {
6755                 enum print_line_t ret;
6756                 int save_len = iter->seq.seq.len;
6757
6758                 ret = print_trace_line(iter);
6759                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6760                         /* don't print partial lines */
6761                         iter->seq.seq.len = save_len;
6762                         break;
6763                 }
6764                 if (ret != TRACE_TYPE_NO_CONSUME)
6765                         trace_consume(iter);
6766
6767                 if (trace_seq_used(&iter->seq) >= cnt)
6768                         break;
6769
6770                 /*
6771                  * Setting the full flag means we reached the trace_seq buffer
6772                  * size and we should leave by partial output condition above.
6773                  * One of the trace_seq_* functions is not used properly.
6774                  */
6775                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6776                           iter->ent->type);
6777         }
6778         trace_access_unlock(iter->cpu_file);
6779         trace_event_read_unlock();
6780
6781         /* Now copy what we have to the user */
6782         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6783         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6784                 trace_seq_init(&iter->seq);
6785
6786         /*
6787          * If there was nothing to send to user, in spite of consuming trace
6788          * entries, go back to wait for more entries.
6789          */
6790         if (sret == -EBUSY)
6791                 goto waitagain;
6792
6793 out:
6794         mutex_unlock(&iter->mutex);
6795
6796         return sret;
6797 }
6798
6799 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6800                                      unsigned int idx)
6801 {
6802         __free_page(spd->pages[idx]);
6803 }
6804
6805 static size_t
6806 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6807 {
6808         size_t count;
6809         int save_len;
6810         int ret;
6811
6812         /* Seq buffer is page-sized, exactly what we need. */
6813         for (;;) {
6814                 save_len = iter->seq.seq.len;
6815                 ret = print_trace_line(iter);
6816
6817                 if (trace_seq_has_overflowed(&iter->seq)) {
6818                         iter->seq.seq.len = save_len;
6819                         break;
6820                 }
6821
6822                 /*
6823                  * This should not be hit, because it should only
6824                  * be set if the iter->seq overflowed. But check it
6825                  * anyway to be safe.
6826                  */
6827                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6828                         iter->seq.seq.len = save_len;
6829                         break;
6830                 }
6831
6832                 count = trace_seq_used(&iter->seq) - save_len;
6833                 if (rem < count) {
6834                         rem = 0;
6835                         iter->seq.seq.len = save_len;
6836                         break;
6837                 }
6838
6839                 if (ret != TRACE_TYPE_NO_CONSUME)
6840                         trace_consume(iter);
6841                 rem -= count;
6842                 if (!trace_find_next_entry_inc(iter))   {
6843                         rem = 0;
6844                         iter->ent = NULL;
6845                         break;
6846                 }
6847         }
6848
6849         return rem;
6850 }
6851
6852 static ssize_t tracing_splice_read_pipe(struct file *filp,
6853                                         loff_t *ppos,
6854                                         struct pipe_inode_info *pipe,
6855                                         size_t len,
6856                                         unsigned int flags)
6857 {
6858         struct page *pages_def[PIPE_DEF_BUFFERS];
6859         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6860         struct trace_iterator *iter = filp->private_data;
6861         struct splice_pipe_desc spd = {
6862                 .pages          = pages_def,
6863                 .partial        = partial_def,
6864                 .nr_pages       = 0, /* This gets updated below. */
6865                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6866                 .ops            = &default_pipe_buf_ops,
6867                 .spd_release    = tracing_spd_release_pipe,
6868         };
6869         ssize_t ret;
6870         size_t rem;
6871         unsigned int i;
6872
6873         if (splice_grow_spd(pipe, &spd))
6874                 return -ENOMEM;
6875
6876         mutex_lock(&iter->mutex);
6877
6878         if (iter->trace->splice_read) {
6879                 ret = iter->trace->splice_read(iter, filp,
6880                                                ppos, pipe, len, flags);
6881                 if (ret)
6882                         goto out_err;
6883         }
6884
6885         ret = tracing_wait_pipe(filp);
6886         if (ret <= 0)
6887                 goto out_err;
6888
6889         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6890                 ret = -EFAULT;
6891                 goto out_err;
6892         }
6893
6894         trace_event_read_lock();
6895         trace_access_lock(iter->cpu_file);
6896
6897         /* Fill as many pages as possible. */
6898         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6899                 spd.pages[i] = alloc_page(GFP_KERNEL);
6900                 if (!spd.pages[i])
6901                         break;
6902
6903                 rem = tracing_fill_pipe_page(rem, iter);
6904
6905                 /* Copy the data into the page, so we can start over. */
6906                 ret = trace_seq_to_buffer(&iter->seq,
6907                                           page_address(spd.pages[i]),
6908                                           trace_seq_used(&iter->seq));
6909                 if (ret < 0) {
6910                         __free_page(spd.pages[i]);
6911                         break;
6912                 }
6913                 spd.partial[i].offset = 0;
6914                 spd.partial[i].len = trace_seq_used(&iter->seq);
6915
6916                 trace_seq_init(&iter->seq);
6917         }
6918
6919         trace_access_unlock(iter->cpu_file);
6920         trace_event_read_unlock();
6921         mutex_unlock(&iter->mutex);
6922
6923         spd.nr_pages = i;
6924
6925         if (i)
6926                 ret = splice_to_pipe(pipe, &spd);
6927         else
6928                 ret = 0;
6929 out:
6930         splice_shrink_spd(&spd);
6931         return ret;
6932
6933 out_err:
6934         mutex_unlock(&iter->mutex);
6935         goto out;
6936 }
6937
6938 static ssize_t
6939 tracing_entries_read(struct file *filp, char __user *ubuf,
6940                      size_t cnt, loff_t *ppos)
6941 {
6942         struct inode *inode = file_inode(filp);
6943         struct trace_array *tr = inode->i_private;
6944         int cpu = tracing_get_cpu(inode);
6945         char buf[64];
6946         int r = 0;
6947         ssize_t ret;
6948
6949         mutex_lock(&trace_types_lock);
6950
6951         if (cpu == RING_BUFFER_ALL_CPUS) {
6952                 int cpu, buf_size_same;
6953                 unsigned long size;
6954
6955                 size = 0;
6956                 buf_size_same = 1;
6957                 /* check if all cpu sizes are same */
6958                 for_each_tracing_cpu(cpu) {
6959                         /* fill in the size from first enabled cpu */
6960                         if (size == 0)
6961                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6962                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6963                                 buf_size_same = 0;
6964                                 break;
6965                         }
6966                 }
6967
6968                 if (buf_size_same) {
6969                         if (!ring_buffer_expanded)
6970                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6971                                             size >> 10,
6972                                             trace_buf_size >> 10);
6973                         else
6974                                 r = sprintf(buf, "%lu\n", size >> 10);
6975                 } else
6976                         r = sprintf(buf, "X\n");
6977         } else
6978                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6979
6980         mutex_unlock(&trace_types_lock);
6981
6982         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6983         return ret;
6984 }
6985
6986 static ssize_t
6987 tracing_entries_write(struct file *filp, const char __user *ubuf,
6988                       size_t cnt, loff_t *ppos)
6989 {
6990         struct inode *inode = file_inode(filp);
6991         struct trace_array *tr = inode->i_private;
6992         unsigned long val;
6993         int ret;
6994
6995         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6996         if (ret)
6997                 return ret;
6998
6999         /* must have at least 1 entry */
7000         if (!val)
7001                 return -EINVAL;
7002
7003         /* value is in KB */
7004         val <<= 10;
7005         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7006         if (ret < 0)
7007                 return ret;
7008
7009         *ppos += cnt;
7010
7011         return cnt;
7012 }
7013
7014 static ssize_t
7015 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7016                                 size_t cnt, loff_t *ppos)
7017 {
7018         struct trace_array *tr = filp->private_data;
7019         char buf[64];
7020         int r, cpu;
7021         unsigned long size = 0, expanded_size = 0;
7022
7023         mutex_lock(&trace_types_lock);
7024         for_each_tracing_cpu(cpu) {
7025                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7026                 if (!ring_buffer_expanded)
7027                         expanded_size += trace_buf_size >> 10;
7028         }
7029         if (ring_buffer_expanded)
7030                 r = sprintf(buf, "%lu\n", size);
7031         else
7032                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7033         mutex_unlock(&trace_types_lock);
7034
7035         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7036 }
7037
7038 static ssize_t
7039 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7040                           size_t cnt, loff_t *ppos)
7041 {
7042         /*
7043          * There is no need to read what the user has written, this function
7044          * is just to make sure that there is no error when "echo" is used
7045          */
7046
7047         *ppos += cnt;
7048
7049         return cnt;
7050 }
7051
7052 static int
7053 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7054 {
7055         struct trace_array *tr = inode->i_private;
7056
7057         /* disable tracing ? */
7058         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7059                 tracer_tracing_off(tr);
7060         /* resize the ring buffer to 0 */
7061         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7062
7063         trace_array_put(tr);
7064
7065         return 0;
7066 }
7067
7068 static ssize_t
7069 tracing_mark_write(struct file *filp, const char __user *ubuf,
7070                                         size_t cnt, loff_t *fpos)
7071 {
7072         struct trace_array *tr = filp->private_data;
7073         struct ring_buffer_event *event;
7074         enum event_trigger_type tt = ETT_NONE;
7075         struct trace_buffer *buffer;
7076         struct print_entry *entry;
7077         ssize_t written;
7078         int size;
7079         int len;
7080
7081 /* Used in tracing_mark_raw_write() as well */
7082 #define FAULTED_STR "<faulted>"
7083 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7084
7085         if (tracing_disabled)
7086                 return -EINVAL;
7087
7088         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7089                 return -EINVAL;
7090
7091         if (cnt > TRACE_BUF_SIZE)
7092                 cnt = TRACE_BUF_SIZE;
7093
7094         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7095
7096         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7097
7098         /* If less than "<faulted>", then make sure we can still add that */
7099         if (cnt < FAULTED_SIZE)
7100                 size += FAULTED_SIZE - cnt;
7101
7102         buffer = tr->array_buffer.buffer;
7103         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7104                                             tracing_gen_ctx());
7105         if (unlikely(!event))
7106                 /* Ring buffer disabled, return as if not open for write */
7107                 return -EBADF;
7108
7109         entry = ring_buffer_event_data(event);
7110         entry->ip = _THIS_IP_;
7111
7112         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7113         if (len) {
7114                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7115                 cnt = FAULTED_SIZE;
7116                 written = -EFAULT;
7117         } else
7118                 written = cnt;
7119
7120         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7121                 /* do not add \n before testing triggers, but add \0 */
7122                 entry->buf[cnt] = '\0';
7123                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7124         }
7125
7126         if (entry->buf[cnt - 1] != '\n') {
7127                 entry->buf[cnt] = '\n';
7128                 entry->buf[cnt + 1] = '\0';
7129         } else
7130                 entry->buf[cnt] = '\0';
7131
7132         if (static_branch_unlikely(&trace_marker_exports_enabled))
7133                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7134         __buffer_unlock_commit(buffer, event);
7135
7136         if (tt)
7137                 event_triggers_post_call(tr->trace_marker_file, tt);
7138
7139         if (written > 0)
7140                 *fpos += written;
7141
7142         return written;
7143 }
7144
7145 /* Limit it for now to 3K (including tag) */
7146 #define RAW_DATA_MAX_SIZE (1024*3)
7147
7148 static ssize_t
7149 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7150                                         size_t cnt, loff_t *fpos)
7151 {
7152         struct trace_array *tr = filp->private_data;
7153         struct ring_buffer_event *event;
7154         struct trace_buffer *buffer;
7155         struct raw_data_entry *entry;
7156         ssize_t written;
7157         int size;
7158         int len;
7159
7160 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7161
7162         if (tracing_disabled)
7163                 return -EINVAL;
7164
7165         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7166                 return -EINVAL;
7167
7168         /* The marker must at least have a tag id */
7169         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7170                 return -EINVAL;
7171
7172         if (cnt > TRACE_BUF_SIZE)
7173                 cnt = TRACE_BUF_SIZE;
7174
7175         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7176
7177         size = sizeof(*entry) + cnt;
7178         if (cnt < FAULT_SIZE_ID)
7179                 size += FAULT_SIZE_ID - cnt;
7180
7181         buffer = tr->array_buffer.buffer;
7182         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7183                                             tracing_gen_ctx());
7184         if (!event)
7185                 /* Ring buffer disabled, return as if not open for write */
7186                 return -EBADF;
7187
7188         entry = ring_buffer_event_data(event);
7189
7190         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7191         if (len) {
7192                 entry->id = -1;
7193                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7194                 written = -EFAULT;
7195         } else
7196                 written = cnt;
7197
7198         __buffer_unlock_commit(buffer, event);
7199
7200         if (written > 0)
7201                 *fpos += written;
7202
7203         return written;
7204 }
7205
7206 static int tracing_clock_show(struct seq_file *m, void *v)
7207 {
7208         struct trace_array *tr = m->private;
7209         int i;
7210
7211         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7212                 seq_printf(m,
7213                         "%s%s%s%s", i ? " " : "",
7214                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7215                         i == tr->clock_id ? "]" : "");
7216         seq_putc(m, '\n');
7217
7218         return 0;
7219 }
7220
7221 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7222 {
7223         int i;
7224
7225         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7226                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7227                         break;
7228         }
7229         if (i == ARRAY_SIZE(trace_clocks))
7230                 return -EINVAL;
7231
7232         mutex_lock(&trace_types_lock);
7233
7234         tr->clock_id = i;
7235
7236         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7237
7238         /*
7239          * New clock may not be consistent with the previous clock.
7240          * Reset the buffer so that it doesn't have incomparable timestamps.
7241          */
7242         tracing_reset_online_cpus(&tr->array_buffer);
7243
7244 #ifdef CONFIG_TRACER_MAX_TRACE
7245         if (tr->max_buffer.buffer)
7246                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7247         tracing_reset_online_cpus(&tr->max_buffer);
7248 #endif
7249
7250         mutex_unlock(&trace_types_lock);
7251
7252         return 0;
7253 }
7254
7255 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7256                                    size_t cnt, loff_t *fpos)
7257 {
7258         struct seq_file *m = filp->private_data;
7259         struct trace_array *tr = m->private;
7260         char buf[64];
7261         const char *clockstr;
7262         int ret;
7263
7264         if (cnt >= sizeof(buf))
7265                 return -EINVAL;
7266
7267         if (copy_from_user(buf, ubuf, cnt))
7268                 return -EFAULT;
7269
7270         buf[cnt] = 0;
7271
7272         clockstr = strstrip(buf);
7273
7274         ret = tracing_set_clock(tr, clockstr);
7275         if (ret)
7276                 return ret;
7277
7278         *fpos += cnt;
7279
7280         return cnt;
7281 }
7282
7283 static int tracing_clock_open(struct inode *inode, struct file *file)
7284 {
7285         struct trace_array *tr = inode->i_private;
7286         int ret;
7287
7288         ret = tracing_check_open_get_tr(tr);
7289         if (ret)
7290                 return ret;
7291
7292         ret = single_open(file, tracing_clock_show, inode->i_private);
7293         if (ret < 0)
7294                 trace_array_put(tr);
7295
7296         return ret;
7297 }
7298
7299 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7300 {
7301         struct trace_array *tr = m->private;
7302
7303         mutex_lock(&trace_types_lock);
7304
7305         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7306                 seq_puts(m, "delta [absolute]\n");
7307         else
7308                 seq_puts(m, "[delta] absolute\n");
7309
7310         mutex_unlock(&trace_types_lock);
7311
7312         return 0;
7313 }
7314
7315 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7316 {
7317         struct trace_array *tr = inode->i_private;
7318         int ret;
7319
7320         ret = tracing_check_open_get_tr(tr);
7321         if (ret)
7322                 return ret;
7323
7324         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7325         if (ret < 0)
7326                 trace_array_put(tr);
7327
7328         return ret;
7329 }
7330
7331 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7332 {
7333         if (rbe == this_cpu_read(trace_buffered_event))
7334                 return ring_buffer_time_stamp(buffer);
7335
7336         return ring_buffer_event_time_stamp(buffer, rbe);
7337 }
7338
7339 /*
7340  * Set or disable using the per CPU trace_buffer_event when possible.
7341  */
7342 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7343 {
7344         int ret = 0;
7345
7346         mutex_lock(&trace_types_lock);
7347
7348         if (set && tr->no_filter_buffering_ref++)
7349                 goto out;
7350
7351         if (!set) {
7352                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7353                         ret = -EINVAL;
7354                         goto out;
7355                 }
7356
7357                 --tr->no_filter_buffering_ref;
7358         }
7359  out:
7360         mutex_unlock(&trace_types_lock);
7361
7362         return ret;
7363 }
7364
7365 struct ftrace_buffer_info {
7366         struct trace_iterator   iter;
7367         void                    *spare;
7368         unsigned int            spare_cpu;
7369         unsigned int            read;
7370 };
7371
7372 #ifdef CONFIG_TRACER_SNAPSHOT
7373 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7374 {
7375         struct trace_array *tr = inode->i_private;
7376         struct trace_iterator *iter;
7377         struct seq_file *m;
7378         int ret;
7379
7380         ret = tracing_check_open_get_tr(tr);
7381         if (ret)
7382                 return ret;
7383
7384         if (file->f_mode & FMODE_READ) {
7385                 iter = __tracing_open(inode, file, true);
7386                 if (IS_ERR(iter))
7387                         ret = PTR_ERR(iter);
7388         } else {
7389                 /* Writes still need the seq_file to hold the private data */
7390                 ret = -ENOMEM;
7391                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7392                 if (!m)
7393                         goto out;
7394                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7395                 if (!iter) {
7396                         kfree(m);
7397                         goto out;
7398                 }
7399                 ret = 0;
7400
7401                 iter->tr = tr;
7402                 iter->array_buffer = &tr->max_buffer;
7403                 iter->cpu_file = tracing_get_cpu(inode);
7404                 m->private = iter;
7405                 file->private_data = m;
7406         }
7407 out:
7408         if (ret < 0)
7409                 trace_array_put(tr);
7410
7411         return ret;
7412 }
7413
7414 static ssize_t
7415 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7416                        loff_t *ppos)
7417 {
7418         struct seq_file *m = filp->private_data;
7419         struct trace_iterator *iter = m->private;
7420         struct trace_array *tr = iter->tr;
7421         unsigned long val;
7422         int ret;
7423
7424         ret = tracing_update_buffers();
7425         if (ret < 0)
7426                 return ret;
7427
7428         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7429         if (ret)
7430                 return ret;
7431
7432         mutex_lock(&trace_types_lock);
7433
7434         if (tr->current_trace->use_max_tr) {
7435                 ret = -EBUSY;
7436                 goto out;
7437         }
7438
7439         arch_spin_lock(&tr->max_lock);
7440         if (tr->cond_snapshot)
7441                 ret = -EBUSY;
7442         arch_spin_unlock(&tr->max_lock);
7443         if (ret)
7444                 goto out;
7445
7446         switch (val) {
7447         case 0:
7448                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7449                         ret = -EINVAL;
7450                         break;
7451                 }
7452                 if (tr->allocated_snapshot)
7453                         free_snapshot(tr);
7454                 break;
7455         case 1:
7456 /* Only allow per-cpu swap if the ring buffer supports it */
7457 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7458                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7459                         ret = -EINVAL;
7460                         break;
7461                 }
7462 #endif
7463                 if (tr->allocated_snapshot)
7464                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7465                                         &tr->array_buffer, iter->cpu_file);
7466                 else
7467                         ret = tracing_alloc_snapshot_instance(tr);
7468                 if (ret < 0)
7469                         break;
7470                 local_irq_disable();
7471                 /* Now, we're going to swap */
7472                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7473                         update_max_tr(tr, current, smp_processor_id(), NULL);
7474                 else
7475                         update_max_tr_single(tr, current, iter->cpu_file);
7476                 local_irq_enable();
7477                 break;
7478         default:
7479                 if (tr->allocated_snapshot) {
7480                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7481                                 tracing_reset_online_cpus(&tr->max_buffer);
7482                         else
7483                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7484                 }
7485                 break;
7486         }
7487
7488         if (ret >= 0) {
7489                 *ppos += cnt;
7490                 ret = cnt;
7491         }
7492 out:
7493         mutex_unlock(&trace_types_lock);
7494         return ret;
7495 }
7496
7497 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7498 {
7499         struct seq_file *m = file->private_data;
7500         int ret;
7501
7502         ret = tracing_release(inode, file);
7503
7504         if (file->f_mode & FMODE_READ)
7505                 return ret;
7506
7507         /* If write only, the seq_file is just a stub */
7508         if (m)
7509                 kfree(m->private);
7510         kfree(m);
7511
7512         return 0;
7513 }
7514
7515 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7516 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7517                                     size_t count, loff_t *ppos);
7518 static int tracing_buffers_release(struct inode *inode, struct file *file);
7519 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7520                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7521
7522 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7523 {
7524         struct ftrace_buffer_info *info;
7525         int ret;
7526
7527         /* The following checks for tracefs lockdown */
7528         ret = tracing_buffers_open(inode, filp);
7529         if (ret < 0)
7530                 return ret;
7531
7532         info = filp->private_data;
7533
7534         if (info->iter.trace->use_max_tr) {
7535                 tracing_buffers_release(inode, filp);
7536                 return -EBUSY;
7537         }
7538
7539         info->iter.snapshot = true;
7540         info->iter.array_buffer = &info->iter.tr->max_buffer;
7541
7542         return ret;
7543 }
7544
7545 #endif /* CONFIG_TRACER_SNAPSHOT */
7546
7547
7548 static const struct file_operations tracing_thresh_fops = {
7549         .open           = tracing_open_generic,
7550         .read           = tracing_thresh_read,
7551         .write          = tracing_thresh_write,
7552         .llseek         = generic_file_llseek,
7553 };
7554
7555 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7556 static const struct file_operations tracing_max_lat_fops = {
7557         .open           = tracing_open_generic,
7558         .read           = tracing_max_lat_read,
7559         .write          = tracing_max_lat_write,
7560         .llseek         = generic_file_llseek,
7561 };
7562 #endif
7563
7564 static const struct file_operations set_tracer_fops = {
7565         .open           = tracing_open_generic,
7566         .read           = tracing_set_trace_read,
7567         .write          = tracing_set_trace_write,
7568         .llseek         = generic_file_llseek,
7569 };
7570
7571 static const struct file_operations tracing_pipe_fops = {
7572         .open           = tracing_open_pipe,
7573         .poll           = tracing_poll_pipe,
7574         .read           = tracing_read_pipe,
7575         .splice_read    = tracing_splice_read_pipe,
7576         .release        = tracing_release_pipe,
7577         .llseek         = no_llseek,
7578 };
7579
7580 static const struct file_operations tracing_entries_fops = {
7581         .open           = tracing_open_generic_tr,
7582         .read           = tracing_entries_read,
7583         .write          = tracing_entries_write,
7584         .llseek         = generic_file_llseek,
7585         .release        = tracing_release_generic_tr,
7586 };
7587
7588 static const struct file_operations tracing_total_entries_fops = {
7589         .open           = tracing_open_generic_tr,
7590         .read           = tracing_total_entries_read,
7591         .llseek         = generic_file_llseek,
7592         .release        = tracing_release_generic_tr,
7593 };
7594
7595 static const struct file_operations tracing_free_buffer_fops = {
7596         .open           = tracing_open_generic_tr,
7597         .write          = tracing_free_buffer_write,
7598         .release        = tracing_free_buffer_release,
7599 };
7600
7601 static const struct file_operations tracing_mark_fops = {
7602         .open           = tracing_open_generic_tr,
7603         .write          = tracing_mark_write,
7604         .llseek         = generic_file_llseek,
7605         .release        = tracing_release_generic_tr,
7606 };
7607
7608 static const struct file_operations tracing_mark_raw_fops = {
7609         .open           = tracing_open_generic_tr,
7610         .write          = tracing_mark_raw_write,
7611         .llseek         = generic_file_llseek,
7612         .release        = tracing_release_generic_tr,
7613 };
7614
7615 static const struct file_operations trace_clock_fops = {
7616         .open           = tracing_clock_open,
7617         .read           = seq_read,
7618         .llseek         = seq_lseek,
7619         .release        = tracing_single_release_tr,
7620         .write          = tracing_clock_write,
7621 };
7622
7623 static const struct file_operations trace_time_stamp_mode_fops = {
7624         .open           = tracing_time_stamp_mode_open,
7625         .read           = seq_read,
7626         .llseek         = seq_lseek,
7627         .release        = tracing_single_release_tr,
7628 };
7629
7630 #ifdef CONFIG_TRACER_SNAPSHOT
7631 static const struct file_operations snapshot_fops = {
7632         .open           = tracing_snapshot_open,
7633         .read           = seq_read,
7634         .write          = tracing_snapshot_write,
7635         .llseek         = tracing_lseek,
7636         .release        = tracing_snapshot_release,
7637 };
7638
7639 static const struct file_operations snapshot_raw_fops = {
7640         .open           = snapshot_raw_open,
7641         .read           = tracing_buffers_read,
7642         .release        = tracing_buffers_release,
7643         .splice_read    = tracing_buffers_splice_read,
7644         .llseek         = no_llseek,
7645 };
7646
7647 #endif /* CONFIG_TRACER_SNAPSHOT */
7648
7649 /*
7650  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7651  * @filp: The active open file structure
7652  * @ubuf: The userspace provided buffer to read value into
7653  * @cnt: The maximum number of bytes to read
7654  * @ppos: The current "file" position
7655  *
7656  * This function implements the write interface for a struct trace_min_max_param.
7657  * The filp->private_data must point to a trace_min_max_param structure that
7658  * defines where to write the value, the min and the max acceptable values,
7659  * and a lock to protect the write.
7660  */
7661 static ssize_t
7662 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7663 {
7664         struct trace_min_max_param *param = filp->private_data;
7665         u64 val;
7666         int err;
7667
7668         if (!param)
7669                 return -EFAULT;
7670
7671         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7672         if (err)
7673                 return err;
7674
7675         if (param->lock)
7676                 mutex_lock(param->lock);
7677
7678         if (param->min && val < *param->min)
7679                 err = -EINVAL;
7680
7681         if (param->max && val > *param->max)
7682                 err = -EINVAL;
7683
7684         if (!err)
7685                 *param->val = val;
7686
7687         if (param->lock)
7688                 mutex_unlock(param->lock);
7689
7690         if (err)
7691                 return err;
7692
7693         return cnt;
7694 }
7695
7696 /*
7697  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7698  * @filp: The active open file structure
7699  * @ubuf: The userspace provided buffer to read value into
7700  * @cnt: The maximum number of bytes to read
7701  * @ppos: The current "file" position
7702  *
7703  * This function implements the read interface for a struct trace_min_max_param.
7704  * The filp->private_data must point to a trace_min_max_param struct with valid
7705  * data.
7706  */
7707 static ssize_t
7708 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7709 {
7710         struct trace_min_max_param *param = filp->private_data;
7711         char buf[U64_STR_SIZE];
7712         int len;
7713         u64 val;
7714
7715         if (!param)
7716                 return -EFAULT;
7717
7718         val = *param->val;
7719
7720         if (cnt > sizeof(buf))
7721                 cnt = sizeof(buf);
7722
7723         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7724
7725         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7726 }
7727
7728 const struct file_operations trace_min_max_fops = {
7729         .open           = tracing_open_generic,
7730         .read           = trace_min_max_read,
7731         .write          = trace_min_max_write,
7732 };
7733
7734 #define TRACING_LOG_ERRS_MAX    8
7735 #define TRACING_LOG_LOC_MAX     128
7736
7737 #define CMD_PREFIX "  Command: "
7738
7739 struct err_info {
7740         const char      **errs; /* ptr to loc-specific array of err strings */
7741         u8              type;   /* index into errs -> specific err string */
7742         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7743         u64             ts;
7744 };
7745
7746 struct tracing_log_err {
7747         struct list_head        list;
7748         struct err_info         info;
7749         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7750         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7751 };
7752
7753 static DEFINE_MUTEX(tracing_err_log_lock);
7754
7755 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7756 {
7757         struct tracing_log_err *err;
7758
7759         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7760                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7761                 if (!err)
7762                         err = ERR_PTR(-ENOMEM);
7763                 else
7764                         tr->n_err_log_entries++;
7765
7766                 return err;
7767         }
7768
7769         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7770         list_del(&err->list);
7771
7772         return err;
7773 }
7774
7775 /**
7776  * err_pos - find the position of a string within a command for error careting
7777  * @cmd: The tracing command that caused the error
7778  * @str: The string to position the caret at within @cmd
7779  *
7780  * Finds the position of the first occurrence of @str within @cmd.  The
7781  * return value can be passed to tracing_log_err() for caret placement
7782  * within @cmd.
7783  *
7784  * Returns the index within @cmd of the first occurrence of @str or 0
7785  * if @str was not found.
7786  */
7787 unsigned int err_pos(char *cmd, const char *str)
7788 {
7789         char *found;
7790
7791         if (WARN_ON(!strlen(cmd)))
7792                 return 0;
7793
7794         found = strstr(cmd, str);
7795         if (found)
7796                 return found - cmd;
7797
7798         return 0;
7799 }
7800
7801 /**
7802  * tracing_log_err - write an error to the tracing error log
7803  * @tr: The associated trace array for the error (NULL for top level array)
7804  * @loc: A string describing where the error occurred
7805  * @cmd: The tracing command that caused the error
7806  * @errs: The array of loc-specific static error strings
7807  * @type: The index into errs[], which produces the specific static err string
7808  * @pos: The position the caret should be placed in the cmd
7809  *
7810  * Writes an error into tracing/error_log of the form:
7811  *
7812  * <loc>: error: <text>
7813  *   Command: <cmd>
7814  *              ^
7815  *
7816  * tracing/error_log is a small log file containing the last
7817  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7818  * unless there has been a tracing error, and the error log can be
7819  * cleared and have its memory freed by writing the empty string in
7820  * truncation mode to it i.e. echo > tracing/error_log.
7821  *
7822  * NOTE: the @errs array along with the @type param are used to
7823  * produce a static error string - this string is not copied and saved
7824  * when the error is logged - only a pointer to it is saved.  See
7825  * existing callers for examples of how static strings are typically
7826  * defined for use with tracing_log_err().
7827  */
7828 void tracing_log_err(struct trace_array *tr,
7829                      const char *loc, const char *cmd,
7830                      const char **errs, u8 type, u8 pos)
7831 {
7832         struct tracing_log_err *err;
7833
7834         if (!tr)
7835                 tr = &global_trace;
7836
7837         mutex_lock(&tracing_err_log_lock);
7838         err = get_tracing_log_err(tr);
7839         if (PTR_ERR(err) == -ENOMEM) {
7840                 mutex_unlock(&tracing_err_log_lock);
7841                 return;
7842         }
7843
7844         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7845         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7846
7847         err->info.errs = errs;
7848         err->info.type = type;
7849         err->info.pos = pos;
7850         err->info.ts = local_clock();
7851
7852         list_add_tail(&err->list, &tr->err_log);
7853         mutex_unlock(&tracing_err_log_lock);
7854 }
7855
7856 static void clear_tracing_err_log(struct trace_array *tr)
7857 {
7858         struct tracing_log_err *err, *next;
7859
7860         mutex_lock(&tracing_err_log_lock);
7861         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7862                 list_del(&err->list);
7863                 kfree(err);
7864         }
7865
7866         tr->n_err_log_entries = 0;
7867         mutex_unlock(&tracing_err_log_lock);
7868 }
7869
7870 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7871 {
7872         struct trace_array *tr = m->private;
7873
7874         mutex_lock(&tracing_err_log_lock);
7875
7876         return seq_list_start(&tr->err_log, *pos);
7877 }
7878
7879 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7880 {
7881         struct trace_array *tr = m->private;
7882
7883         return seq_list_next(v, &tr->err_log, pos);
7884 }
7885
7886 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7887 {
7888         mutex_unlock(&tracing_err_log_lock);
7889 }
7890
7891 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7892 {
7893         u8 i;
7894
7895         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7896                 seq_putc(m, ' ');
7897         for (i = 0; i < pos; i++)
7898                 seq_putc(m, ' ');
7899         seq_puts(m, "^\n");
7900 }
7901
7902 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7903 {
7904         struct tracing_log_err *err = v;
7905
7906         if (err) {
7907                 const char *err_text = err->info.errs[err->info.type];
7908                 u64 sec = err->info.ts;
7909                 u32 nsec;
7910
7911                 nsec = do_div(sec, NSEC_PER_SEC);
7912                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7913                            err->loc, err_text);
7914                 seq_printf(m, "%s", err->cmd);
7915                 tracing_err_log_show_pos(m, err->info.pos);
7916         }
7917
7918         return 0;
7919 }
7920
7921 static const struct seq_operations tracing_err_log_seq_ops = {
7922         .start  = tracing_err_log_seq_start,
7923         .next   = tracing_err_log_seq_next,
7924         .stop   = tracing_err_log_seq_stop,
7925         .show   = tracing_err_log_seq_show
7926 };
7927
7928 static int tracing_err_log_open(struct inode *inode, struct file *file)
7929 {
7930         struct trace_array *tr = inode->i_private;
7931         int ret = 0;
7932
7933         ret = tracing_check_open_get_tr(tr);
7934         if (ret)
7935                 return ret;
7936
7937         /* If this file was opened for write, then erase contents */
7938         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7939                 clear_tracing_err_log(tr);
7940
7941         if (file->f_mode & FMODE_READ) {
7942                 ret = seq_open(file, &tracing_err_log_seq_ops);
7943                 if (!ret) {
7944                         struct seq_file *m = file->private_data;
7945                         m->private = tr;
7946                 } else {
7947                         trace_array_put(tr);
7948                 }
7949         }
7950         return ret;
7951 }
7952
7953 static ssize_t tracing_err_log_write(struct file *file,
7954                                      const char __user *buffer,
7955                                      size_t count, loff_t *ppos)
7956 {
7957         return count;
7958 }
7959
7960 static int tracing_err_log_release(struct inode *inode, struct file *file)
7961 {
7962         struct trace_array *tr = inode->i_private;
7963
7964         trace_array_put(tr);
7965
7966         if (file->f_mode & FMODE_READ)
7967                 seq_release(inode, file);
7968
7969         return 0;
7970 }
7971
7972 static const struct file_operations tracing_err_log_fops = {
7973         .open           = tracing_err_log_open,
7974         .write          = tracing_err_log_write,
7975         .read           = seq_read,
7976         .llseek         = seq_lseek,
7977         .release        = tracing_err_log_release,
7978 };
7979
7980 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7981 {
7982         struct trace_array *tr = inode->i_private;
7983         struct ftrace_buffer_info *info;
7984         int ret;
7985
7986         ret = tracing_check_open_get_tr(tr);
7987         if (ret)
7988                 return ret;
7989
7990         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7991         if (!info) {
7992                 trace_array_put(tr);
7993                 return -ENOMEM;
7994         }
7995
7996         mutex_lock(&trace_types_lock);
7997
7998         info->iter.tr           = tr;
7999         info->iter.cpu_file     = tracing_get_cpu(inode);
8000         info->iter.trace        = tr->current_trace;
8001         info->iter.array_buffer = &tr->array_buffer;
8002         info->spare             = NULL;
8003         /* Force reading ring buffer for first read */
8004         info->read              = (unsigned int)-1;
8005
8006         filp->private_data = info;
8007
8008         tr->trace_ref++;
8009
8010         mutex_unlock(&trace_types_lock);
8011
8012         ret = nonseekable_open(inode, filp);
8013         if (ret < 0)
8014                 trace_array_put(tr);
8015
8016         return ret;
8017 }
8018
8019 static __poll_t
8020 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8021 {
8022         struct ftrace_buffer_info *info = filp->private_data;
8023         struct trace_iterator *iter = &info->iter;
8024
8025         return trace_poll(iter, filp, poll_table);
8026 }
8027
8028 static ssize_t
8029 tracing_buffers_read(struct file *filp, char __user *ubuf,
8030                      size_t count, loff_t *ppos)
8031 {
8032         struct ftrace_buffer_info *info = filp->private_data;
8033         struct trace_iterator *iter = &info->iter;
8034         ssize_t ret = 0;
8035         ssize_t size;
8036
8037         if (!count)
8038                 return 0;
8039
8040 #ifdef CONFIG_TRACER_MAX_TRACE
8041         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8042                 return -EBUSY;
8043 #endif
8044
8045         if (!info->spare) {
8046                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8047                                                           iter->cpu_file);
8048                 if (IS_ERR(info->spare)) {
8049                         ret = PTR_ERR(info->spare);
8050                         info->spare = NULL;
8051                 } else {
8052                         info->spare_cpu = iter->cpu_file;
8053                 }
8054         }
8055         if (!info->spare)
8056                 return ret;
8057
8058         /* Do we have previous read data to read? */
8059         if (info->read < PAGE_SIZE)
8060                 goto read;
8061
8062  again:
8063         trace_access_lock(iter->cpu_file);
8064         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8065                                     &info->spare,
8066                                     count,
8067                                     iter->cpu_file, 0);
8068         trace_access_unlock(iter->cpu_file);
8069
8070         if (ret < 0) {
8071                 if (trace_empty(iter)) {
8072                         if ((filp->f_flags & O_NONBLOCK))
8073                                 return -EAGAIN;
8074
8075                         ret = wait_on_pipe(iter, 0);
8076                         if (ret)
8077                                 return ret;
8078
8079                         goto again;
8080                 }
8081                 return 0;
8082         }
8083
8084         info->read = 0;
8085  read:
8086         size = PAGE_SIZE - info->read;
8087         if (size > count)
8088                 size = count;
8089
8090         ret = copy_to_user(ubuf, info->spare + info->read, size);
8091         if (ret == size)
8092                 return -EFAULT;
8093
8094         size -= ret;
8095
8096         *ppos += size;
8097         info->read += size;
8098
8099         return size;
8100 }
8101
8102 static int tracing_buffers_release(struct inode *inode, struct file *file)
8103 {
8104         struct ftrace_buffer_info *info = file->private_data;
8105         struct trace_iterator *iter = &info->iter;
8106
8107         mutex_lock(&trace_types_lock);
8108
8109         iter->tr->trace_ref--;
8110
8111         __trace_array_put(iter->tr);
8112
8113         if (info->spare)
8114                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8115                                            info->spare_cpu, info->spare);
8116         kvfree(info);
8117
8118         mutex_unlock(&trace_types_lock);
8119
8120         return 0;
8121 }
8122
8123 struct buffer_ref {
8124         struct trace_buffer     *buffer;
8125         void                    *page;
8126         int                     cpu;
8127         refcount_t              refcount;
8128 };
8129
8130 static void buffer_ref_release(struct buffer_ref *ref)
8131 {
8132         if (!refcount_dec_and_test(&ref->refcount))
8133                 return;
8134         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8135         kfree(ref);
8136 }
8137
8138 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8139                                     struct pipe_buffer *buf)
8140 {
8141         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8142
8143         buffer_ref_release(ref);
8144         buf->private = 0;
8145 }
8146
8147 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8148                                 struct pipe_buffer *buf)
8149 {
8150         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8151
8152         if (refcount_read(&ref->refcount) > INT_MAX/2)
8153                 return false;
8154
8155         refcount_inc(&ref->refcount);
8156         return true;
8157 }
8158
8159 /* Pipe buffer operations for a buffer. */
8160 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8161         .release                = buffer_pipe_buf_release,
8162         .get                    = buffer_pipe_buf_get,
8163 };
8164
8165 /*
8166  * Callback from splice_to_pipe(), if we need to release some pages
8167  * at the end of the spd in case we error'ed out in filling the pipe.
8168  */
8169 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8170 {
8171         struct buffer_ref *ref =
8172                 (struct buffer_ref *)spd->partial[i].private;
8173
8174         buffer_ref_release(ref);
8175         spd->partial[i].private = 0;
8176 }
8177
8178 static ssize_t
8179 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8180                             struct pipe_inode_info *pipe, size_t len,
8181                             unsigned int flags)
8182 {
8183         struct ftrace_buffer_info *info = file->private_data;
8184         struct trace_iterator *iter = &info->iter;
8185         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8186         struct page *pages_def[PIPE_DEF_BUFFERS];
8187         struct splice_pipe_desc spd = {
8188                 .pages          = pages_def,
8189                 .partial        = partial_def,
8190                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8191                 .ops            = &buffer_pipe_buf_ops,
8192                 .spd_release    = buffer_spd_release,
8193         };
8194         struct buffer_ref *ref;
8195         int entries, i;
8196         ssize_t ret = 0;
8197
8198 #ifdef CONFIG_TRACER_MAX_TRACE
8199         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8200                 return -EBUSY;
8201 #endif
8202
8203         if (*ppos & (PAGE_SIZE - 1))
8204                 return -EINVAL;
8205
8206         if (len & (PAGE_SIZE - 1)) {
8207                 if (len < PAGE_SIZE)
8208                         return -EINVAL;
8209                 len &= PAGE_MASK;
8210         }
8211
8212         if (splice_grow_spd(pipe, &spd))
8213                 return -ENOMEM;
8214
8215  again:
8216         trace_access_lock(iter->cpu_file);
8217         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8218
8219         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8220                 struct page *page;
8221                 int r;
8222
8223                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8224                 if (!ref) {
8225                         ret = -ENOMEM;
8226                         break;
8227                 }
8228
8229                 refcount_set(&ref->refcount, 1);
8230                 ref->buffer = iter->array_buffer->buffer;
8231                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8232                 if (IS_ERR(ref->page)) {
8233                         ret = PTR_ERR(ref->page);
8234                         ref->page = NULL;
8235                         kfree(ref);
8236                         break;
8237                 }
8238                 ref->cpu = iter->cpu_file;
8239
8240                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8241                                           len, iter->cpu_file, 1);
8242                 if (r < 0) {
8243                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8244                                                    ref->page);
8245                         kfree(ref);
8246                         break;
8247                 }
8248
8249                 page = virt_to_page(ref->page);
8250
8251                 spd.pages[i] = page;
8252                 spd.partial[i].len = PAGE_SIZE;
8253                 spd.partial[i].offset = 0;
8254                 spd.partial[i].private = (unsigned long)ref;
8255                 spd.nr_pages++;
8256                 *ppos += PAGE_SIZE;
8257
8258                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8259         }
8260
8261         trace_access_unlock(iter->cpu_file);
8262         spd.nr_pages = i;
8263
8264         /* did we read anything? */
8265         if (!spd.nr_pages) {
8266                 if (ret)
8267                         goto out;
8268
8269                 ret = -EAGAIN;
8270                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8271                         goto out;
8272
8273                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8274                 if (ret)
8275                         goto out;
8276
8277                 goto again;
8278         }
8279
8280         ret = splice_to_pipe(pipe, &spd);
8281 out:
8282         splice_shrink_spd(&spd);
8283
8284         return ret;
8285 }
8286
8287 static const struct file_operations tracing_buffers_fops = {
8288         .open           = tracing_buffers_open,
8289         .read           = tracing_buffers_read,
8290         .poll           = tracing_buffers_poll,
8291         .release        = tracing_buffers_release,
8292         .splice_read    = tracing_buffers_splice_read,
8293         .llseek         = no_llseek,
8294 };
8295
8296 static ssize_t
8297 tracing_stats_read(struct file *filp, char __user *ubuf,
8298                    size_t count, loff_t *ppos)
8299 {
8300         struct inode *inode = file_inode(filp);
8301         struct trace_array *tr = inode->i_private;
8302         struct array_buffer *trace_buf = &tr->array_buffer;
8303         int cpu = tracing_get_cpu(inode);
8304         struct trace_seq *s;
8305         unsigned long cnt;
8306         unsigned long long t;
8307         unsigned long usec_rem;
8308
8309         s = kmalloc(sizeof(*s), GFP_KERNEL);
8310         if (!s)
8311                 return -ENOMEM;
8312
8313         trace_seq_init(s);
8314
8315         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8316         trace_seq_printf(s, "entries: %ld\n", cnt);
8317
8318         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8319         trace_seq_printf(s, "overrun: %ld\n", cnt);
8320
8321         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8322         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8323
8324         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8325         trace_seq_printf(s, "bytes: %ld\n", cnt);
8326
8327         if (trace_clocks[tr->clock_id].in_ns) {
8328                 /* local or global for trace_clock */
8329                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8330                 usec_rem = do_div(t, USEC_PER_SEC);
8331                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8332                                                                 t, usec_rem);
8333
8334                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8335                 usec_rem = do_div(t, USEC_PER_SEC);
8336                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8337         } else {
8338                 /* counter or tsc mode for trace_clock */
8339                 trace_seq_printf(s, "oldest event ts: %llu\n",
8340                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8341
8342                 trace_seq_printf(s, "now ts: %llu\n",
8343                                 ring_buffer_time_stamp(trace_buf->buffer));
8344         }
8345
8346         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8347         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8348
8349         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8350         trace_seq_printf(s, "read events: %ld\n", cnt);
8351
8352         count = simple_read_from_buffer(ubuf, count, ppos,
8353                                         s->buffer, trace_seq_used(s));
8354
8355         kfree(s);
8356
8357         return count;
8358 }
8359
8360 static const struct file_operations tracing_stats_fops = {
8361         .open           = tracing_open_generic_tr,
8362         .read           = tracing_stats_read,
8363         .llseek         = generic_file_llseek,
8364         .release        = tracing_release_generic_tr,
8365 };
8366
8367 #ifdef CONFIG_DYNAMIC_FTRACE
8368
8369 static ssize_t
8370 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8371                   size_t cnt, loff_t *ppos)
8372 {
8373         ssize_t ret;
8374         char *buf;
8375         int r;
8376
8377         /* 256 should be plenty to hold the amount needed */
8378         buf = kmalloc(256, GFP_KERNEL);
8379         if (!buf)
8380                 return -ENOMEM;
8381
8382         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8383                       ftrace_update_tot_cnt,
8384                       ftrace_number_of_pages,
8385                       ftrace_number_of_groups);
8386
8387         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8388         kfree(buf);
8389         return ret;
8390 }
8391
8392 static const struct file_operations tracing_dyn_info_fops = {
8393         .open           = tracing_open_generic,
8394         .read           = tracing_read_dyn_info,
8395         .llseek         = generic_file_llseek,
8396 };
8397 #endif /* CONFIG_DYNAMIC_FTRACE */
8398
8399 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8400 static void
8401 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8402                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8403                 void *data)
8404 {
8405         tracing_snapshot_instance(tr);
8406 }
8407
8408 static void
8409 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8410                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8411                       void *data)
8412 {
8413         struct ftrace_func_mapper *mapper = data;
8414         long *count = NULL;
8415
8416         if (mapper)
8417                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8418
8419         if (count) {
8420
8421                 if (*count <= 0)
8422                         return;
8423
8424                 (*count)--;
8425         }
8426
8427         tracing_snapshot_instance(tr);
8428 }
8429
8430 static int
8431 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8432                       struct ftrace_probe_ops *ops, void *data)
8433 {
8434         struct ftrace_func_mapper *mapper = data;
8435         long *count = NULL;
8436
8437         seq_printf(m, "%ps:", (void *)ip);
8438
8439         seq_puts(m, "snapshot");
8440
8441         if (mapper)
8442                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8443
8444         if (count)
8445                 seq_printf(m, ":count=%ld\n", *count);
8446         else
8447                 seq_puts(m, ":unlimited\n");
8448
8449         return 0;
8450 }
8451
8452 static int
8453 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8454                      unsigned long ip, void *init_data, void **data)
8455 {
8456         struct ftrace_func_mapper *mapper = *data;
8457
8458         if (!mapper) {
8459                 mapper = allocate_ftrace_func_mapper();
8460                 if (!mapper)
8461                         return -ENOMEM;
8462                 *data = mapper;
8463         }
8464
8465         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8466 }
8467
8468 static void
8469 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8470                      unsigned long ip, void *data)
8471 {
8472         struct ftrace_func_mapper *mapper = data;
8473
8474         if (!ip) {
8475                 if (!mapper)
8476                         return;
8477                 free_ftrace_func_mapper(mapper, NULL);
8478                 return;
8479         }
8480
8481         ftrace_func_mapper_remove_ip(mapper, ip);
8482 }
8483
8484 static struct ftrace_probe_ops snapshot_probe_ops = {
8485         .func                   = ftrace_snapshot,
8486         .print                  = ftrace_snapshot_print,
8487 };
8488
8489 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8490         .func                   = ftrace_count_snapshot,
8491         .print                  = ftrace_snapshot_print,
8492         .init                   = ftrace_snapshot_init,
8493         .free                   = ftrace_snapshot_free,
8494 };
8495
8496 static int
8497 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8498                                char *glob, char *cmd, char *param, int enable)
8499 {
8500         struct ftrace_probe_ops *ops;
8501         void *count = (void *)-1;
8502         char *number;
8503         int ret;
8504
8505         if (!tr)
8506                 return -ENODEV;
8507
8508         /* hash funcs only work with set_ftrace_filter */
8509         if (!enable)
8510                 return -EINVAL;
8511
8512         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8513
8514         if (glob[0] == '!')
8515                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8516
8517         if (!param)
8518                 goto out_reg;
8519
8520         number = strsep(&param, ":");
8521
8522         if (!strlen(number))
8523                 goto out_reg;
8524
8525         /*
8526          * We use the callback data field (which is a pointer)
8527          * as our counter.
8528          */
8529         ret = kstrtoul(number, 0, (unsigned long *)&count);
8530         if (ret)
8531                 return ret;
8532
8533  out_reg:
8534         ret = tracing_alloc_snapshot_instance(tr);
8535         if (ret < 0)
8536                 goto out;
8537
8538         ret = register_ftrace_function_probe(glob, tr, ops, count);
8539
8540  out:
8541         return ret < 0 ? ret : 0;
8542 }
8543
8544 static struct ftrace_func_command ftrace_snapshot_cmd = {
8545         .name                   = "snapshot",
8546         .func                   = ftrace_trace_snapshot_callback,
8547 };
8548
8549 static __init int register_snapshot_cmd(void)
8550 {
8551         return register_ftrace_command(&ftrace_snapshot_cmd);
8552 }
8553 #else
8554 static inline __init int register_snapshot_cmd(void) { return 0; }
8555 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8556
8557 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8558 {
8559         if (WARN_ON(!tr->dir))
8560                 return ERR_PTR(-ENODEV);
8561
8562         /* Top directory uses NULL as the parent */
8563         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8564                 return NULL;
8565
8566         /* All sub buffers have a descriptor */
8567         return tr->dir;
8568 }
8569
8570 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8571 {
8572         struct dentry *d_tracer;
8573
8574         if (tr->percpu_dir)
8575                 return tr->percpu_dir;
8576
8577         d_tracer = tracing_get_dentry(tr);
8578         if (IS_ERR(d_tracer))
8579                 return NULL;
8580
8581         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8582
8583         MEM_FAIL(!tr->percpu_dir,
8584                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8585
8586         return tr->percpu_dir;
8587 }
8588
8589 static struct dentry *
8590 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8591                       void *data, long cpu, const struct file_operations *fops)
8592 {
8593         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8594
8595         if (ret) /* See tracing_get_cpu() */
8596                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8597         return ret;
8598 }
8599
8600 static void
8601 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8602 {
8603         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8604         struct dentry *d_cpu;
8605         char cpu_dir[30]; /* 30 characters should be more than enough */
8606
8607         if (!d_percpu)
8608                 return;
8609
8610         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8611         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8612         if (!d_cpu) {
8613                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8614                 return;
8615         }
8616
8617         /* per cpu trace_pipe */
8618         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8619                                 tr, cpu, &tracing_pipe_fops);
8620
8621         /* per cpu trace */
8622         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8623                                 tr, cpu, &tracing_fops);
8624
8625         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8626                                 tr, cpu, &tracing_buffers_fops);
8627
8628         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8629                                 tr, cpu, &tracing_stats_fops);
8630
8631         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8632                                 tr, cpu, &tracing_entries_fops);
8633
8634 #ifdef CONFIG_TRACER_SNAPSHOT
8635         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8636                                 tr, cpu, &snapshot_fops);
8637
8638         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8639                                 tr, cpu, &snapshot_raw_fops);
8640 #endif
8641 }
8642
8643 #ifdef CONFIG_FTRACE_SELFTEST
8644 /* Let selftest have access to static functions in this file */
8645 #include "trace_selftest.c"
8646 #endif
8647
8648 static ssize_t
8649 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8650                         loff_t *ppos)
8651 {
8652         struct trace_option_dentry *topt = filp->private_data;
8653         char *buf;
8654
8655         if (topt->flags->val & topt->opt->bit)
8656                 buf = "1\n";
8657         else
8658                 buf = "0\n";
8659
8660         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8661 }
8662
8663 static ssize_t
8664 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8665                          loff_t *ppos)
8666 {
8667         struct trace_option_dentry *topt = filp->private_data;
8668         unsigned long val;
8669         int ret;
8670
8671         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8672         if (ret)
8673                 return ret;
8674
8675         if (val != 0 && val != 1)
8676                 return -EINVAL;
8677
8678         if (!!(topt->flags->val & topt->opt->bit) != val) {
8679                 mutex_lock(&trace_types_lock);
8680                 ret = __set_tracer_option(topt->tr, topt->flags,
8681                                           topt->opt, !val);
8682                 mutex_unlock(&trace_types_lock);
8683                 if (ret)
8684                         return ret;
8685         }
8686
8687         *ppos += cnt;
8688
8689         return cnt;
8690 }
8691
8692
8693 static const struct file_operations trace_options_fops = {
8694         .open = tracing_open_generic,
8695         .read = trace_options_read,
8696         .write = trace_options_write,
8697         .llseek = generic_file_llseek,
8698 };
8699
8700 /*
8701  * In order to pass in both the trace_array descriptor as well as the index
8702  * to the flag that the trace option file represents, the trace_array
8703  * has a character array of trace_flags_index[], which holds the index
8704  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8705  * The address of this character array is passed to the flag option file
8706  * read/write callbacks.
8707  *
8708  * In order to extract both the index and the trace_array descriptor,
8709  * get_tr_index() uses the following algorithm.
8710  *
8711  *   idx = *ptr;
8712  *
8713  * As the pointer itself contains the address of the index (remember
8714  * index[1] == 1).
8715  *
8716  * Then to get the trace_array descriptor, by subtracting that index
8717  * from the ptr, we get to the start of the index itself.
8718  *
8719  *   ptr - idx == &index[0]
8720  *
8721  * Then a simple container_of() from that pointer gets us to the
8722  * trace_array descriptor.
8723  */
8724 static void get_tr_index(void *data, struct trace_array **ptr,
8725                          unsigned int *pindex)
8726 {
8727         *pindex = *(unsigned char *)data;
8728
8729         *ptr = container_of(data - *pindex, struct trace_array,
8730                             trace_flags_index);
8731 }
8732
8733 static ssize_t
8734 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8735                         loff_t *ppos)
8736 {
8737         void *tr_index = filp->private_data;
8738         struct trace_array *tr;
8739         unsigned int index;
8740         char *buf;
8741
8742         get_tr_index(tr_index, &tr, &index);
8743
8744         if (tr->trace_flags & (1 << index))
8745                 buf = "1\n";
8746         else
8747                 buf = "0\n";
8748
8749         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8750 }
8751
8752 static ssize_t
8753 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8754                          loff_t *ppos)
8755 {
8756         void *tr_index = filp->private_data;
8757         struct trace_array *tr;
8758         unsigned int index;
8759         unsigned long val;
8760         int ret;
8761
8762         get_tr_index(tr_index, &tr, &index);
8763
8764         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8765         if (ret)
8766                 return ret;
8767
8768         if (val != 0 && val != 1)
8769                 return -EINVAL;
8770
8771         mutex_lock(&event_mutex);
8772         mutex_lock(&trace_types_lock);
8773         ret = set_tracer_flag(tr, 1 << index, val);
8774         mutex_unlock(&trace_types_lock);
8775         mutex_unlock(&event_mutex);
8776
8777         if (ret < 0)
8778                 return ret;
8779
8780         *ppos += cnt;
8781
8782         return cnt;
8783 }
8784
8785 static const struct file_operations trace_options_core_fops = {
8786         .open = tracing_open_generic,
8787         .read = trace_options_core_read,
8788         .write = trace_options_core_write,
8789         .llseek = generic_file_llseek,
8790 };
8791
8792 struct dentry *trace_create_file(const char *name,
8793                                  umode_t mode,
8794                                  struct dentry *parent,
8795                                  void *data,
8796                                  const struct file_operations *fops)
8797 {
8798         struct dentry *ret;
8799
8800         ret = tracefs_create_file(name, mode, parent, data, fops);
8801         if (!ret)
8802                 pr_warn("Could not create tracefs '%s' entry\n", name);
8803
8804         return ret;
8805 }
8806
8807
8808 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8809 {
8810         struct dentry *d_tracer;
8811
8812         if (tr->options)
8813                 return tr->options;
8814
8815         d_tracer = tracing_get_dentry(tr);
8816         if (IS_ERR(d_tracer))
8817                 return NULL;
8818
8819         tr->options = tracefs_create_dir("options", d_tracer);
8820         if (!tr->options) {
8821                 pr_warn("Could not create tracefs directory 'options'\n");
8822                 return NULL;
8823         }
8824
8825         return tr->options;
8826 }
8827
8828 static void
8829 create_trace_option_file(struct trace_array *tr,
8830                          struct trace_option_dentry *topt,
8831                          struct tracer_flags *flags,
8832                          struct tracer_opt *opt)
8833 {
8834         struct dentry *t_options;
8835
8836         t_options = trace_options_init_dentry(tr);
8837         if (!t_options)
8838                 return;
8839
8840         topt->flags = flags;
8841         topt->opt = opt;
8842         topt->tr = tr;
8843
8844         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8845                                         t_options, topt, &trace_options_fops);
8846
8847 }
8848
8849 static void
8850 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8851 {
8852         struct trace_option_dentry *topts;
8853         struct trace_options *tr_topts;
8854         struct tracer_flags *flags;
8855         struct tracer_opt *opts;
8856         int cnt;
8857         int i;
8858
8859         if (!tracer)
8860                 return;
8861
8862         flags = tracer->flags;
8863
8864         if (!flags || !flags->opts)
8865                 return;
8866
8867         /*
8868          * If this is an instance, only create flags for tracers
8869          * the instance may have.
8870          */
8871         if (!trace_ok_for_array(tracer, tr))
8872                 return;
8873
8874         for (i = 0; i < tr->nr_topts; i++) {
8875                 /* Make sure there's no duplicate flags. */
8876                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8877                         return;
8878         }
8879
8880         opts = flags->opts;
8881
8882         for (cnt = 0; opts[cnt].name; cnt++)
8883                 ;
8884
8885         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8886         if (!topts)
8887                 return;
8888
8889         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8890                             GFP_KERNEL);
8891         if (!tr_topts) {
8892                 kfree(topts);
8893                 return;
8894         }
8895
8896         tr->topts = tr_topts;
8897         tr->topts[tr->nr_topts].tracer = tracer;
8898         tr->topts[tr->nr_topts].topts = topts;
8899         tr->nr_topts++;
8900
8901         for (cnt = 0; opts[cnt].name; cnt++) {
8902                 create_trace_option_file(tr, &topts[cnt], flags,
8903                                          &opts[cnt]);
8904                 MEM_FAIL(topts[cnt].entry == NULL,
8905                           "Failed to create trace option: %s",
8906                           opts[cnt].name);
8907         }
8908 }
8909
8910 static struct dentry *
8911 create_trace_option_core_file(struct trace_array *tr,
8912                               const char *option, long index)
8913 {
8914         struct dentry *t_options;
8915
8916         t_options = trace_options_init_dentry(tr);
8917         if (!t_options)
8918                 return NULL;
8919
8920         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8921                                  (void *)&tr->trace_flags_index[index],
8922                                  &trace_options_core_fops);
8923 }
8924
8925 static void create_trace_options_dir(struct trace_array *tr)
8926 {
8927         struct dentry *t_options;
8928         bool top_level = tr == &global_trace;
8929         int i;
8930
8931         t_options = trace_options_init_dentry(tr);
8932         if (!t_options)
8933                 return;
8934
8935         for (i = 0; trace_options[i]; i++) {
8936                 if (top_level ||
8937                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8938                         create_trace_option_core_file(tr, trace_options[i], i);
8939         }
8940 }
8941
8942 static ssize_t
8943 rb_simple_read(struct file *filp, char __user *ubuf,
8944                size_t cnt, loff_t *ppos)
8945 {
8946         struct trace_array *tr = filp->private_data;
8947         char buf[64];
8948         int r;
8949
8950         r = tracer_tracing_is_on(tr);
8951         r = sprintf(buf, "%d\n", r);
8952
8953         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8954 }
8955
8956 static ssize_t
8957 rb_simple_write(struct file *filp, const char __user *ubuf,
8958                 size_t cnt, loff_t *ppos)
8959 {
8960         struct trace_array *tr = filp->private_data;
8961         struct trace_buffer *buffer = tr->array_buffer.buffer;
8962         unsigned long val;
8963         int ret;
8964
8965         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8966         if (ret)
8967                 return ret;
8968
8969         if (buffer) {
8970                 mutex_lock(&trace_types_lock);
8971                 if (!!val == tracer_tracing_is_on(tr)) {
8972                         val = 0; /* do nothing */
8973                 } else if (val) {
8974                         tracer_tracing_on(tr);
8975                         if (tr->current_trace->start)
8976                                 tr->current_trace->start(tr);
8977                 } else {
8978                         tracer_tracing_off(tr);
8979                         if (tr->current_trace->stop)
8980                                 tr->current_trace->stop(tr);
8981                 }
8982                 mutex_unlock(&trace_types_lock);
8983         }
8984
8985         (*ppos)++;
8986
8987         return cnt;
8988 }
8989
8990 static const struct file_operations rb_simple_fops = {
8991         .open           = tracing_open_generic_tr,
8992         .read           = rb_simple_read,
8993         .write          = rb_simple_write,
8994         .release        = tracing_release_generic_tr,
8995         .llseek         = default_llseek,
8996 };
8997
8998 static ssize_t
8999 buffer_percent_read(struct file *filp, char __user *ubuf,
9000                     size_t cnt, loff_t *ppos)
9001 {
9002         struct trace_array *tr = filp->private_data;
9003         char buf[64];
9004         int r;
9005
9006         r = tr->buffer_percent;
9007         r = sprintf(buf, "%d\n", r);
9008
9009         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9010 }
9011
9012 static ssize_t
9013 buffer_percent_write(struct file *filp, const char __user *ubuf,
9014                      size_t cnt, loff_t *ppos)
9015 {
9016         struct trace_array *tr = filp->private_data;
9017         unsigned long val;
9018         int ret;
9019
9020         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9021         if (ret)
9022                 return ret;
9023
9024         if (val > 100)
9025                 return -EINVAL;
9026
9027         if (!val)
9028                 val = 1;
9029
9030         tr->buffer_percent = val;
9031
9032         (*ppos)++;
9033
9034         return cnt;
9035 }
9036
9037 static const struct file_operations buffer_percent_fops = {
9038         .open           = tracing_open_generic_tr,
9039         .read           = buffer_percent_read,
9040         .write          = buffer_percent_write,
9041         .release        = tracing_release_generic_tr,
9042         .llseek         = default_llseek,
9043 };
9044
9045 static struct dentry *trace_instance_dir;
9046
9047 static void
9048 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9049
9050 static int
9051 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9052 {
9053         enum ring_buffer_flags rb_flags;
9054
9055         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9056
9057         buf->tr = tr;
9058
9059         buf->buffer = ring_buffer_alloc(size, rb_flags);
9060         if (!buf->buffer)
9061                 return -ENOMEM;
9062
9063         buf->data = alloc_percpu(struct trace_array_cpu);
9064         if (!buf->data) {
9065                 ring_buffer_free(buf->buffer);
9066                 buf->buffer = NULL;
9067                 return -ENOMEM;
9068         }
9069
9070         /* Allocate the first page for all buffers */
9071         set_buffer_entries(&tr->array_buffer,
9072                            ring_buffer_size(tr->array_buffer.buffer, 0));
9073
9074         return 0;
9075 }
9076
9077 static int allocate_trace_buffers(struct trace_array *tr, int size)
9078 {
9079         int ret;
9080
9081         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9082         if (ret)
9083                 return ret;
9084
9085 #ifdef CONFIG_TRACER_MAX_TRACE
9086         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9087                                     allocate_snapshot ? size : 1);
9088         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9089                 ring_buffer_free(tr->array_buffer.buffer);
9090                 tr->array_buffer.buffer = NULL;
9091                 free_percpu(tr->array_buffer.data);
9092                 tr->array_buffer.data = NULL;
9093                 return -ENOMEM;
9094         }
9095         tr->allocated_snapshot = allocate_snapshot;
9096
9097         /*
9098          * Only the top level trace array gets its snapshot allocated
9099          * from the kernel command line.
9100          */
9101         allocate_snapshot = false;
9102 #endif
9103
9104         return 0;
9105 }
9106
9107 static void free_trace_buffer(struct array_buffer *buf)
9108 {
9109         if (buf->buffer) {
9110                 ring_buffer_free(buf->buffer);
9111                 buf->buffer = NULL;
9112                 free_percpu(buf->data);
9113                 buf->data = NULL;
9114         }
9115 }
9116
9117 static void free_trace_buffers(struct trace_array *tr)
9118 {
9119         if (!tr)
9120                 return;
9121
9122         free_trace_buffer(&tr->array_buffer);
9123
9124 #ifdef CONFIG_TRACER_MAX_TRACE
9125         free_trace_buffer(&tr->max_buffer);
9126 #endif
9127 }
9128
9129 static void init_trace_flags_index(struct trace_array *tr)
9130 {
9131         int i;
9132
9133         /* Used by the trace options files */
9134         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9135                 tr->trace_flags_index[i] = i;
9136 }
9137
9138 static void __update_tracer_options(struct trace_array *tr)
9139 {
9140         struct tracer *t;
9141
9142         for (t = trace_types; t; t = t->next)
9143                 add_tracer_options(tr, t);
9144 }
9145
9146 static void update_tracer_options(struct trace_array *tr)
9147 {
9148         mutex_lock(&trace_types_lock);
9149         __update_tracer_options(tr);
9150         mutex_unlock(&trace_types_lock);
9151 }
9152
9153 /* Must have trace_types_lock held */
9154 struct trace_array *trace_array_find(const char *instance)
9155 {
9156         struct trace_array *tr, *found = NULL;
9157
9158         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9159                 if (tr->name && strcmp(tr->name, instance) == 0) {
9160                         found = tr;
9161                         break;
9162                 }
9163         }
9164
9165         return found;
9166 }
9167
9168 struct trace_array *trace_array_find_get(const char *instance)
9169 {
9170         struct trace_array *tr;
9171
9172         mutex_lock(&trace_types_lock);
9173         tr = trace_array_find(instance);
9174         if (tr)
9175                 tr->ref++;
9176         mutex_unlock(&trace_types_lock);
9177
9178         return tr;
9179 }
9180
9181 static int trace_array_create_dir(struct trace_array *tr)
9182 {
9183         int ret;
9184
9185         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9186         if (!tr->dir)
9187                 return -EINVAL;
9188
9189         ret = event_trace_add_tracer(tr->dir, tr);
9190         if (ret) {
9191                 tracefs_remove(tr->dir);
9192                 return ret;
9193         }
9194
9195         init_tracer_tracefs(tr, tr->dir);
9196         __update_tracer_options(tr);
9197
9198         return ret;
9199 }
9200
9201 static struct trace_array *trace_array_create(const char *name)
9202 {
9203         struct trace_array *tr;
9204         int ret;
9205
9206         ret = -ENOMEM;
9207         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9208         if (!tr)
9209                 return ERR_PTR(ret);
9210
9211         tr->name = kstrdup(name, GFP_KERNEL);
9212         if (!tr->name)
9213                 goto out_free_tr;
9214
9215         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9216                 goto out_free_tr;
9217
9218         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9219
9220         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9221
9222         raw_spin_lock_init(&tr->start_lock);
9223
9224         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9225
9226         tr->current_trace = &nop_trace;
9227
9228         INIT_LIST_HEAD(&tr->systems);
9229         INIT_LIST_HEAD(&tr->events);
9230         INIT_LIST_HEAD(&tr->hist_vars);
9231         INIT_LIST_HEAD(&tr->err_log);
9232
9233         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9234                 goto out_free_tr;
9235
9236         if (ftrace_allocate_ftrace_ops(tr) < 0)
9237                 goto out_free_tr;
9238
9239         ftrace_init_trace_array(tr);
9240
9241         init_trace_flags_index(tr);
9242
9243         if (trace_instance_dir) {
9244                 ret = trace_array_create_dir(tr);
9245                 if (ret)
9246                         goto out_free_tr;
9247         } else
9248                 __trace_early_add_events(tr);
9249
9250         list_add(&tr->list, &ftrace_trace_arrays);
9251
9252         tr->ref++;
9253
9254         return tr;
9255
9256  out_free_tr:
9257         ftrace_free_ftrace_ops(tr);
9258         free_trace_buffers(tr);
9259         free_cpumask_var(tr->tracing_cpumask);
9260         kfree(tr->name);
9261         kfree(tr);
9262
9263         return ERR_PTR(ret);
9264 }
9265
9266 static int instance_mkdir(const char *name)
9267 {
9268         struct trace_array *tr;
9269         int ret;
9270
9271         mutex_lock(&event_mutex);
9272         mutex_lock(&trace_types_lock);
9273
9274         ret = -EEXIST;
9275         if (trace_array_find(name))
9276                 goto out_unlock;
9277
9278         tr = trace_array_create(name);
9279
9280         ret = PTR_ERR_OR_ZERO(tr);
9281
9282 out_unlock:
9283         mutex_unlock(&trace_types_lock);
9284         mutex_unlock(&event_mutex);
9285         return ret;
9286 }
9287
9288 /**
9289  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9290  * @name: The name of the trace array to be looked up/created.
9291  *
9292  * Returns pointer to trace array with given name.
9293  * NULL, if it cannot be created.
9294  *
9295  * NOTE: This function increments the reference counter associated with the
9296  * trace array returned. This makes sure it cannot be freed while in use.
9297  * Use trace_array_put() once the trace array is no longer needed.
9298  * If the trace_array is to be freed, trace_array_destroy() needs to
9299  * be called after the trace_array_put(), or simply let user space delete
9300  * it from the tracefs instances directory. But until the
9301  * trace_array_put() is called, user space can not delete it.
9302  *
9303  */
9304 struct trace_array *trace_array_get_by_name(const char *name)
9305 {
9306         struct trace_array *tr;
9307
9308         mutex_lock(&event_mutex);
9309         mutex_lock(&trace_types_lock);
9310
9311         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9312                 if (tr->name && strcmp(tr->name, name) == 0)
9313                         goto out_unlock;
9314         }
9315
9316         tr = trace_array_create(name);
9317
9318         if (IS_ERR(tr))
9319                 tr = NULL;
9320 out_unlock:
9321         if (tr)
9322                 tr->ref++;
9323
9324         mutex_unlock(&trace_types_lock);
9325         mutex_unlock(&event_mutex);
9326         return tr;
9327 }
9328 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9329
9330 static int __remove_instance(struct trace_array *tr)
9331 {
9332         int i;
9333
9334         /* Reference counter for a newly created trace array = 1. */
9335         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9336                 return -EBUSY;
9337
9338         list_del(&tr->list);
9339
9340         /* Disable all the flags that were enabled coming in */
9341         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9342                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9343                         set_tracer_flag(tr, 1 << i, 0);
9344         }
9345
9346         tracing_set_nop(tr);
9347         clear_ftrace_function_probes(tr);
9348         event_trace_del_tracer(tr);
9349         ftrace_clear_pids(tr);
9350         ftrace_destroy_function_files(tr);
9351         tracefs_remove(tr->dir);
9352         free_percpu(tr->last_func_repeats);
9353         free_trace_buffers(tr);
9354
9355         for (i = 0; i < tr->nr_topts; i++) {
9356                 kfree(tr->topts[i].topts);
9357         }
9358         kfree(tr->topts);
9359
9360         free_cpumask_var(tr->tracing_cpumask);
9361         kfree(tr->name);
9362         kfree(tr);
9363
9364         return 0;
9365 }
9366
9367 int trace_array_destroy(struct trace_array *this_tr)
9368 {
9369         struct trace_array *tr;
9370         int ret;
9371
9372         if (!this_tr)
9373                 return -EINVAL;
9374
9375         mutex_lock(&event_mutex);
9376         mutex_lock(&trace_types_lock);
9377
9378         ret = -ENODEV;
9379
9380         /* Making sure trace array exists before destroying it. */
9381         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9382                 if (tr == this_tr) {
9383                         ret = __remove_instance(tr);
9384                         break;
9385                 }
9386         }
9387
9388         mutex_unlock(&trace_types_lock);
9389         mutex_unlock(&event_mutex);
9390
9391         return ret;
9392 }
9393 EXPORT_SYMBOL_GPL(trace_array_destroy);
9394
9395 static int instance_rmdir(const char *name)
9396 {
9397         struct trace_array *tr;
9398         int ret;
9399
9400         mutex_lock(&event_mutex);
9401         mutex_lock(&trace_types_lock);
9402
9403         ret = -ENODEV;
9404         tr = trace_array_find(name);
9405         if (tr)
9406                 ret = __remove_instance(tr);
9407
9408         mutex_unlock(&trace_types_lock);
9409         mutex_unlock(&event_mutex);
9410
9411         return ret;
9412 }
9413
9414 static __init void create_trace_instances(struct dentry *d_tracer)
9415 {
9416         struct trace_array *tr;
9417
9418         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9419                                                          instance_mkdir,
9420                                                          instance_rmdir);
9421         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9422                 return;
9423
9424         mutex_lock(&event_mutex);
9425         mutex_lock(&trace_types_lock);
9426
9427         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9428                 if (!tr->name)
9429                         continue;
9430                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9431                              "Failed to create instance directory\n"))
9432                         break;
9433         }
9434
9435         mutex_unlock(&trace_types_lock);
9436         mutex_unlock(&event_mutex);
9437 }
9438
9439 static void
9440 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9441 {
9442         struct trace_event_file *file;
9443         int cpu;
9444
9445         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9446                         tr, &show_traces_fops);
9447
9448         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9449                         tr, &set_tracer_fops);
9450
9451         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9452                           tr, &tracing_cpumask_fops);
9453
9454         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9455                           tr, &tracing_iter_fops);
9456
9457         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9458                           tr, &tracing_fops);
9459
9460         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9461                           tr, &tracing_pipe_fops);
9462
9463         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9464                           tr, &tracing_entries_fops);
9465
9466         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9467                           tr, &tracing_total_entries_fops);
9468
9469         trace_create_file("free_buffer", 0200, d_tracer,
9470                           tr, &tracing_free_buffer_fops);
9471
9472         trace_create_file("trace_marker", 0220, d_tracer,
9473                           tr, &tracing_mark_fops);
9474
9475         file = __find_event_file(tr, "ftrace", "print");
9476         if (file && file->dir)
9477                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9478                                   file, &event_trigger_fops);
9479         tr->trace_marker_file = file;
9480
9481         trace_create_file("trace_marker_raw", 0220, d_tracer,
9482                           tr, &tracing_mark_raw_fops);
9483
9484         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9485                           &trace_clock_fops);
9486
9487         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9488                           tr, &rb_simple_fops);
9489
9490         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9491                           &trace_time_stamp_mode_fops);
9492
9493         tr->buffer_percent = 50;
9494
9495         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9496                         tr, &buffer_percent_fops);
9497
9498         create_trace_options_dir(tr);
9499
9500         trace_create_maxlat_file(tr, d_tracer);
9501
9502         if (ftrace_create_function_files(tr, d_tracer))
9503                 MEM_FAIL(1, "Could not allocate function filter files");
9504
9505 #ifdef CONFIG_TRACER_SNAPSHOT
9506         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9507                           tr, &snapshot_fops);
9508 #endif
9509
9510         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9511                           tr, &tracing_err_log_fops);
9512
9513         for_each_tracing_cpu(cpu)
9514                 tracing_init_tracefs_percpu(tr, cpu);
9515
9516         ftrace_init_tracefs(tr, d_tracer);
9517 }
9518
9519 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9520 {
9521         struct vfsmount *mnt;
9522         struct file_system_type *type;
9523
9524         /*
9525          * To maintain backward compatibility for tools that mount
9526          * debugfs to get to the tracing facility, tracefs is automatically
9527          * mounted to the debugfs/tracing directory.
9528          */
9529         type = get_fs_type("tracefs");
9530         if (!type)
9531                 return NULL;
9532         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9533         put_filesystem(type);
9534         if (IS_ERR(mnt))
9535                 return NULL;
9536         mntget(mnt);
9537
9538         return mnt;
9539 }
9540
9541 /**
9542  * tracing_init_dentry - initialize top level trace array
9543  *
9544  * This is called when creating files or directories in the tracing
9545  * directory. It is called via fs_initcall() by any of the boot up code
9546  * and expects to return the dentry of the top level tracing directory.
9547  */
9548 int tracing_init_dentry(void)
9549 {
9550         struct trace_array *tr = &global_trace;
9551
9552         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9553                 pr_warn("Tracing disabled due to lockdown\n");
9554                 return -EPERM;
9555         }
9556
9557         /* The top level trace array uses  NULL as parent */
9558         if (tr->dir)
9559                 return 0;
9560
9561         if (WARN_ON(!tracefs_initialized()))
9562                 return -ENODEV;
9563
9564         /*
9565          * As there may still be users that expect the tracing
9566          * files to exist in debugfs/tracing, we must automount
9567          * the tracefs file system there, so older tools still
9568          * work with the newer kernel.
9569          */
9570         tr->dir = debugfs_create_automount("tracing", NULL,
9571                                            trace_automount, NULL);
9572
9573         return 0;
9574 }
9575
9576 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9577 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9578
9579 static struct workqueue_struct *eval_map_wq __initdata;
9580 static struct work_struct eval_map_work __initdata;
9581
9582 static void __init eval_map_work_func(struct work_struct *work)
9583 {
9584         int len;
9585
9586         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9587         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9588 }
9589
9590 static int __init trace_eval_init(void)
9591 {
9592         INIT_WORK(&eval_map_work, eval_map_work_func);
9593
9594         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9595         if (!eval_map_wq) {
9596                 pr_err("Unable to allocate eval_map_wq\n");
9597                 /* Do work here */
9598                 eval_map_work_func(&eval_map_work);
9599                 return -ENOMEM;
9600         }
9601
9602         queue_work(eval_map_wq, &eval_map_work);
9603         return 0;
9604 }
9605
9606 static int __init trace_eval_sync(void)
9607 {
9608         /* Make sure the eval map updates are finished */
9609         if (eval_map_wq)
9610                 destroy_workqueue(eval_map_wq);
9611         return 0;
9612 }
9613
9614 late_initcall_sync(trace_eval_sync);
9615
9616
9617 #ifdef CONFIG_MODULES
9618 static void trace_module_add_evals(struct module *mod)
9619 {
9620         if (!mod->num_trace_evals)
9621                 return;
9622
9623         /*
9624          * Modules with bad taint do not have events created, do
9625          * not bother with enums either.
9626          */
9627         if (trace_module_has_bad_taint(mod))
9628                 return;
9629
9630         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9631 }
9632
9633 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9634 static void trace_module_remove_evals(struct module *mod)
9635 {
9636         union trace_eval_map_item *map;
9637         union trace_eval_map_item **last = &trace_eval_maps;
9638
9639         if (!mod->num_trace_evals)
9640                 return;
9641
9642         mutex_lock(&trace_eval_mutex);
9643
9644         map = trace_eval_maps;
9645
9646         while (map) {
9647                 if (map->head.mod == mod)
9648                         break;
9649                 map = trace_eval_jmp_to_tail(map);
9650                 last = &map->tail.next;
9651                 map = map->tail.next;
9652         }
9653         if (!map)
9654                 goto out;
9655
9656         *last = trace_eval_jmp_to_tail(map)->tail.next;
9657         kfree(map);
9658  out:
9659         mutex_unlock(&trace_eval_mutex);
9660 }
9661 #else
9662 static inline void trace_module_remove_evals(struct module *mod) { }
9663 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9664
9665 static int trace_module_notify(struct notifier_block *self,
9666                                unsigned long val, void *data)
9667 {
9668         struct module *mod = data;
9669
9670         switch (val) {
9671         case MODULE_STATE_COMING:
9672                 trace_module_add_evals(mod);
9673                 break;
9674         case MODULE_STATE_GOING:
9675                 trace_module_remove_evals(mod);
9676                 break;
9677         }
9678
9679         return NOTIFY_OK;
9680 }
9681
9682 static struct notifier_block trace_module_nb = {
9683         .notifier_call = trace_module_notify,
9684         .priority = 0,
9685 };
9686 #endif /* CONFIG_MODULES */
9687
9688 static __init int tracer_init_tracefs(void)
9689 {
9690         int ret;
9691
9692         trace_access_lock_init();
9693
9694         ret = tracing_init_dentry();
9695         if (ret)
9696                 return 0;
9697
9698         event_trace_init();
9699
9700         init_tracer_tracefs(&global_trace, NULL);
9701         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9702
9703         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9704                         &global_trace, &tracing_thresh_fops);
9705
9706         trace_create_file("README", TRACE_MODE_READ, NULL,
9707                         NULL, &tracing_readme_fops);
9708
9709         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9710                         NULL, &tracing_saved_cmdlines_fops);
9711
9712         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9713                           NULL, &tracing_saved_cmdlines_size_fops);
9714
9715         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9716                         NULL, &tracing_saved_tgids_fops);
9717
9718         trace_eval_init();
9719
9720         trace_create_eval_file(NULL);
9721
9722 #ifdef CONFIG_MODULES
9723         register_module_notifier(&trace_module_nb);
9724 #endif
9725
9726 #ifdef CONFIG_DYNAMIC_FTRACE
9727         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9728                         NULL, &tracing_dyn_info_fops);
9729 #endif
9730
9731         create_trace_instances(NULL);
9732
9733         update_tracer_options(&global_trace);
9734
9735         return 0;
9736 }
9737
9738 fs_initcall(tracer_init_tracefs);
9739
9740 static int trace_panic_handler(struct notifier_block *this,
9741                                unsigned long event, void *unused)
9742 {
9743         if (ftrace_dump_on_oops)
9744                 ftrace_dump(ftrace_dump_on_oops);
9745         return NOTIFY_OK;
9746 }
9747
9748 static struct notifier_block trace_panic_notifier = {
9749         .notifier_call  = trace_panic_handler,
9750         .next           = NULL,
9751         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9752 };
9753
9754 static int trace_die_handler(struct notifier_block *self,
9755                              unsigned long val,
9756                              void *data)
9757 {
9758         switch (val) {
9759         case DIE_OOPS:
9760                 if (ftrace_dump_on_oops)
9761                         ftrace_dump(ftrace_dump_on_oops);
9762                 break;
9763         default:
9764                 break;
9765         }
9766         return NOTIFY_OK;
9767 }
9768
9769 static struct notifier_block trace_die_notifier = {
9770         .notifier_call = trace_die_handler,
9771         .priority = 200
9772 };
9773
9774 /*
9775  * printk is set to max of 1024, we really don't need it that big.
9776  * Nothing should be printing 1000 characters anyway.
9777  */
9778 #define TRACE_MAX_PRINT         1000
9779
9780 /*
9781  * Define here KERN_TRACE so that we have one place to modify
9782  * it if we decide to change what log level the ftrace dump
9783  * should be at.
9784  */
9785 #define KERN_TRACE              KERN_EMERG
9786
9787 void
9788 trace_printk_seq(struct trace_seq *s)
9789 {
9790         /* Probably should print a warning here. */
9791         if (s->seq.len >= TRACE_MAX_PRINT)
9792                 s->seq.len = TRACE_MAX_PRINT;
9793
9794         /*
9795          * More paranoid code. Although the buffer size is set to
9796          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9797          * an extra layer of protection.
9798          */
9799         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9800                 s->seq.len = s->seq.size - 1;
9801
9802         /* should be zero ended, but we are paranoid. */
9803         s->buffer[s->seq.len] = 0;
9804
9805         printk(KERN_TRACE "%s", s->buffer);
9806
9807         trace_seq_init(s);
9808 }
9809
9810 void trace_init_global_iter(struct trace_iterator *iter)
9811 {
9812         iter->tr = &global_trace;
9813         iter->trace = iter->tr->current_trace;
9814         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9815         iter->array_buffer = &global_trace.array_buffer;
9816
9817         if (iter->trace && iter->trace->open)
9818                 iter->trace->open(iter);
9819
9820         /* Annotate start of buffers if we had overruns */
9821         if (ring_buffer_overruns(iter->array_buffer->buffer))
9822                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9823
9824         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9825         if (trace_clocks[iter->tr->clock_id].in_ns)
9826                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9827 }
9828
9829 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9830 {
9831         /* use static because iter can be a bit big for the stack */
9832         static struct trace_iterator iter;
9833         static atomic_t dump_running;
9834         struct trace_array *tr = &global_trace;
9835         unsigned int old_userobj;
9836         unsigned long flags;
9837         int cnt = 0, cpu;
9838
9839         /* Only allow one dump user at a time. */
9840         if (atomic_inc_return(&dump_running) != 1) {
9841                 atomic_dec(&dump_running);
9842                 return;
9843         }
9844
9845         /*
9846          * Always turn off tracing when we dump.
9847          * We don't need to show trace output of what happens
9848          * between multiple crashes.
9849          *
9850          * If the user does a sysrq-z, then they can re-enable
9851          * tracing with echo 1 > tracing_on.
9852          */
9853         tracing_off();
9854
9855         local_irq_save(flags);
9856
9857         /* Simulate the iterator */
9858         trace_init_global_iter(&iter);
9859         /* Can not use kmalloc for iter.temp and iter.fmt */
9860         iter.temp = static_temp_buf;
9861         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9862         iter.fmt = static_fmt_buf;
9863         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9864
9865         for_each_tracing_cpu(cpu) {
9866                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9867         }
9868
9869         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9870
9871         /* don't look at user memory in panic mode */
9872         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9873
9874         switch (oops_dump_mode) {
9875         case DUMP_ALL:
9876                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9877                 break;
9878         case DUMP_ORIG:
9879                 iter.cpu_file = raw_smp_processor_id();
9880                 break;
9881         case DUMP_NONE:
9882                 goto out_enable;
9883         default:
9884                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9885                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9886         }
9887
9888         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9889
9890         /* Did function tracer already get disabled? */
9891         if (ftrace_is_dead()) {
9892                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9893                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9894         }
9895
9896         /*
9897          * We need to stop all tracing on all CPUS to read
9898          * the next buffer. This is a bit expensive, but is
9899          * not done often. We fill all what we can read,
9900          * and then release the locks again.
9901          */
9902
9903         while (!trace_empty(&iter)) {
9904
9905                 if (!cnt)
9906                         printk(KERN_TRACE "---------------------------------\n");
9907
9908                 cnt++;
9909
9910                 trace_iterator_reset(&iter);
9911                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9912
9913                 if (trace_find_next_entry_inc(&iter) != NULL) {
9914                         int ret;
9915
9916                         ret = print_trace_line(&iter);
9917                         if (ret != TRACE_TYPE_NO_CONSUME)
9918                                 trace_consume(&iter);
9919                 }
9920                 touch_nmi_watchdog();
9921
9922                 trace_printk_seq(&iter.seq);
9923         }
9924
9925         if (!cnt)
9926                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9927         else
9928                 printk(KERN_TRACE "---------------------------------\n");
9929
9930  out_enable:
9931         tr->trace_flags |= old_userobj;
9932
9933         for_each_tracing_cpu(cpu) {
9934                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9935         }
9936         atomic_dec(&dump_running);
9937         local_irq_restore(flags);
9938 }
9939 EXPORT_SYMBOL_GPL(ftrace_dump);
9940
9941 #define WRITE_BUFSIZE  4096
9942
9943 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9944                                 size_t count, loff_t *ppos,
9945                                 int (*createfn)(const char *))
9946 {
9947         char *kbuf, *buf, *tmp;
9948         int ret = 0;
9949         size_t done = 0;
9950         size_t size;
9951
9952         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9953         if (!kbuf)
9954                 return -ENOMEM;
9955
9956         while (done < count) {
9957                 size = count - done;
9958
9959                 if (size >= WRITE_BUFSIZE)
9960                         size = WRITE_BUFSIZE - 1;
9961
9962                 if (copy_from_user(kbuf, buffer + done, size)) {
9963                         ret = -EFAULT;
9964                         goto out;
9965                 }
9966                 kbuf[size] = '\0';
9967                 buf = kbuf;
9968                 do {
9969                         tmp = strchr(buf, '\n');
9970                         if (tmp) {
9971                                 *tmp = '\0';
9972                                 size = tmp - buf + 1;
9973                         } else {
9974                                 size = strlen(buf);
9975                                 if (done + size < count) {
9976                                         if (buf != kbuf)
9977                                                 break;
9978                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9979                                         pr_warn("Line length is too long: Should be less than %d\n",
9980                                                 WRITE_BUFSIZE - 2);
9981                                         ret = -EINVAL;
9982                                         goto out;
9983                                 }
9984                         }
9985                         done += size;
9986
9987                         /* Remove comments */
9988                         tmp = strchr(buf, '#');
9989
9990                         if (tmp)
9991                                 *tmp = '\0';
9992
9993                         ret = createfn(buf);
9994                         if (ret)
9995                                 goto out;
9996                         buf += size;
9997
9998                 } while (done < count);
9999         }
10000         ret = done;
10001
10002 out:
10003         kfree(kbuf);
10004
10005         return ret;
10006 }
10007
10008 __init static int tracer_alloc_buffers(void)
10009 {
10010         int ring_buf_size;
10011         int ret = -ENOMEM;
10012
10013
10014         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10015                 pr_warn("Tracing disabled due to lockdown\n");
10016                 return -EPERM;
10017         }
10018
10019         /*
10020          * Make sure we don't accidentally add more trace options
10021          * than we have bits for.
10022          */
10023         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10024
10025         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10026                 goto out;
10027
10028         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10029                 goto out_free_buffer_mask;
10030
10031         /* Only allocate trace_printk buffers if a trace_printk exists */
10032         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10033                 /* Must be called before global_trace.buffer is allocated */
10034                 trace_printk_init_buffers();
10035
10036         /* To save memory, keep the ring buffer size to its minimum */
10037         if (ring_buffer_expanded)
10038                 ring_buf_size = trace_buf_size;
10039         else
10040                 ring_buf_size = 1;
10041
10042         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10043         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10044
10045         raw_spin_lock_init(&global_trace.start_lock);
10046
10047         /*
10048          * The prepare callbacks allocates some memory for the ring buffer. We
10049          * don't free the buffer if the CPU goes down. If we were to free
10050          * the buffer, then the user would lose any trace that was in the
10051          * buffer. The memory will be removed once the "instance" is removed.
10052          */
10053         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10054                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10055                                       NULL);
10056         if (ret < 0)
10057                 goto out_free_cpumask;
10058         /* Used for event triggers */
10059         ret = -ENOMEM;
10060         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10061         if (!temp_buffer)
10062                 goto out_rm_hp_state;
10063
10064         if (trace_create_savedcmd() < 0)
10065                 goto out_free_temp_buffer;
10066
10067         /* TODO: make the number of buffers hot pluggable with CPUS */
10068         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10069                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10070                 goto out_free_savedcmd;
10071         }
10072
10073         if (global_trace.buffer_disabled)
10074                 tracing_off();
10075
10076         if (trace_boot_clock) {
10077                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10078                 if (ret < 0)
10079                         pr_warn("Trace clock %s not defined, going back to default\n",
10080                                 trace_boot_clock);
10081         }
10082
10083         /*
10084          * register_tracer() might reference current_trace, so it
10085          * needs to be set before we register anything. This is
10086          * just a bootstrap of current_trace anyway.
10087          */
10088         global_trace.current_trace = &nop_trace;
10089
10090         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10091
10092         ftrace_init_global_array_ops(&global_trace);
10093
10094         init_trace_flags_index(&global_trace);
10095
10096         register_tracer(&nop_trace);
10097
10098         /* Function tracing may start here (via kernel command line) */
10099         init_function_trace();
10100
10101         /* All seems OK, enable tracing */
10102         tracing_disabled = 0;
10103
10104         atomic_notifier_chain_register(&panic_notifier_list,
10105                                        &trace_panic_notifier);
10106
10107         register_die_notifier(&trace_die_notifier);
10108
10109         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10110
10111         INIT_LIST_HEAD(&global_trace.systems);
10112         INIT_LIST_HEAD(&global_trace.events);
10113         INIT_LIST_HEAD(&global_trace.hist_vars);
10114         INIT_LIST_HEAD(&global_trace.err_log);
10115         list_add(&global_trace.list, &ftrace_trace_arrays);
10116
10117         apply_trace_boot_options();
10118
10119         register_snapshot_cmd();
10120
10121         test_can_verify();
10122
10123         return 0;
10124
10125 out_free_savedcmd:
10126         free_saved_cmdlines_buffer(savedcmd);
10127 out_free_temp_buffer:
10128         ring_buffer_free(temp_buffer);
10129 out_rm_hp_state:
10130         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10131 out_free_cpumask:
10132         free_cpumask_var(global_trace.tracing_cpumask);
10133 out_free_buffer_mask:
10134         free_cpumask_var(tracing_buffer_mask);
10135 out:
10136         return ret;
10137 }
10138
10139 void __init early_trace_init(void)
10140 {
10141         if (tracepoint_printk) {
10142                 tracepoint_print_iter =
10143                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10144                 if (MEM_FAIL(!tracepoint_print_iter,
10145                              "Failed to allocate trace iterator\n"))
10146                         tracepoint_printk = 0;
10147                 else
10148                         static_key_enable(&tracepoint_printk_key.key);
10149         }
10150         tracer_alloc_buffers();
10151 }
10152
10153 void __init trace_init(void)
10154 {
10155         trace_event_init();
10156 }
10157
10158 __init static void clear_boot_tracer(void)
10159 {
10160         /*
10161          * The default tracer at boot buffer is an init section.
10162          * This function is called in lateinit. If we did not
10163          * find the boot tracer, then clear it out, to prevent
10164          * later registration from accessing the buffer that is
10165          * about to be freed.
10166          */
10167         if (!default_bootup_tracer)
10168                 return;
10169
10170         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10171                default_bootup_tracer);
10172         default_bootup_tracer = NULL;
10173 }
10174
10175 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10176 __init static void tracing_set_default_clock(void)
10177 {
10178         /* sched_clock_stable() is determined in late_initcall */
10179         if (!trace_boot_clock && !sched_clock_stable()) {
10180                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10181                         pr_warn("Can not set tracing clock due to lockdown\n");
10182                         return;
10183                 }
10184
10185                 printk(KERN_WARNING
10186                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10187                        "If you want to keep using the local clock, then add:\n"
10188                        "  \"trace_clock=local\"\n"
10189                        "on the kernel command line\n");
10190                 tracing_set_clock(&global_trace, "global");
10191         }
10192 }
10193 #else
10194 static inline void tracing_set_default_clock(void) { }
10195 #endif
10196
10197 __init static int late_trace_init(void)
10198 {
10199         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10200                 static_key_disable(&tracepoint_printk_key.key);
10201                 tracepoint_printk = 0;
10202         }
10203
10204         tracing_set_default_clock();
10205         clear_boot_tracer();
10206         return 0;
10207 }
10208
10209 late_initcall_sync(late_trace_init);