1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189 
set_cmdline_ftrace(char * str)190 static int __init set_cmdline_ftrace(char *str)
191 {
192 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193 	default_bootup_tracer = bootup_tracer_buf;
194 	/* We are using ftrace early, expand it */
195 	ring_buffer_expanded = true;
196 	return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199 
set_ftrace_dump_on_oops(char * str)200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
203 		ftrace_dump_on_oops = DUMP_ALL;
204 		return 1;
205 	}
206 
207 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208 		ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211 
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215 
stop_trace_on_warning(char * str)216 static int __init stop_trace_on_warning(char *str)
217 {
218 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219 		__disable_trace_on_warning = 1;
220 	return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223 
boot_alloc_snapshot(char * str)224 static int __init boot_alloc_snapshot(char *str)
225 {
226 	allocate_snapshot = true;
227 	/* We also need the main ring buffer expanded */
228 	ring_buffer_expanded = true;
229 	return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232 
233 
boot_snapshot(char * str)234 static int __init boot_snapshot(char *str)
235 {
236 	snapshot_at_boot = true;
237 	boot_alloc_snapshot(str);
238 	return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241 
242 
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244 
set_trace_boot_options(char * str)245 static int __init set_trace_boot_options(char *str)
246 {
247 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248 	return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251 
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254 
set_trace_boot_clock(char * str)255 static int __init set_trace_boot_clock(char *str)
256 {
257 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258 	trace_boot_clock = trace_boot_clock_buf;
259 	return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262 
set_tracepoint_printk(char * str)263 static int __init set_tracepoint_printk(char *str)
264 {
265 	/* Ignore the "tp_printk_stop_on_boot" param */
266 	if (*str == '_')
267 		return 0;
268 
269 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270 		tracepoint_printk = 1;
271 	return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274 
set_tracepoint_printk_stop(char * str)275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277 	tracepoint_printk_stop_on_boot = true;
278 	return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281 
ns2usecs(u64 nsec)282 unsigned long long ns2usecs(u64 nsec)
283 {
284 	nsec += 500;
285 	do_div(nsec, 1000);
286 	return nsec;
287 }
288 
289 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)290 trace_process_export(struct trace_export *export,
291 	       struct ring_buffer_event *event, int flag)
292 {
293 	struct trace_entry *entry;
294 	unsigned int size = 0;
295 
296 	if (export->flags & flag) {
297 		entry = ring_buffer_event_data(event);
298 		size = ring_buffer_event_length(event);
299 		export->write(export, entry, size);
300 	}
301 }
302 
303 static DEFINE_MUTEX(ftrace_export_lock);
304 
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306 
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310 
ftrace_exports_enable(struct trace_export * export)311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313 	if (export->flags & TRACE_EXPORT_FUNCTION)
314 		static_branch_inc(&trace_function_exports_enabled);
315 
316 	if (export->flags & TRACE_EXPORT_EVENT)
317 		static_branch_inc(&trace_event_exports_enabled);
318 
319 	if (export->flags & TRACE_EXPORT_MARKER)
320 		static_branch_inc(&trace_marker_exports_enabled);
321 }
322 
ftrace_exports_disable(struct trace_export * export)323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325 	if (export->flags & TRACE_EXPORT_FUNCTION)
326 		static_branch_dec(&trace_function_exports_enabled);
327 
328 	if (export->flags & TRACE_EXPORT_EVENT)
329 		static_branch_dec(&trace_event_exports_enabled);
330 
331 	if (export->flags & TRACE_EXPORT_MARKER)
332 		static_branch_dec(&trace_marker_exports_enabled);
333 }
334 
ftrace_exports(struct ring_buffer_event * event,int flag)335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337 	struct trace_export *export;
338 
339 	preempt_disable_notrace();
340 
341 	export = rcu_dereference_raw_check(ftrace_exports_list);
342 	while (export) {
343 		trace_process_export(export, event, flag);
344 		export = rcu_dereference_raw_check(export->next);
345 	}
346 
347 	preempt_enable_notrace();
348 }
349 
350 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353 	rcu_assign_pointer(export->next, *list);
354 	/*
355 	 * We are entering export into the list but another
356 	 * CPU might be walking that list. We need to make sure
357 	 * the export->next pointer is valid before another CPU sees
358 	 * the export pointer included into the list.
359 	 */
360 	rcu_assign_pointer(*list, export);
361 }
362 
363 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366 	struct trace_export **p;
367 
368 	for (p = list; *p != NULL; p = &(*p)->next)
369 		if (*p == export)
370 			break;
371 
372 	if (*p != export)
373 		return -1;
374 
375 	rcu_assign_pointer(*p, (*p)->next);
376 
377 	return 0;
378 }
379 
380 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383 	ftrace_exports_enable(export);
384 
385 	add_trace_export(list, export);
386 }
387 
388 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391 	int ret;
392 
393 	ret = rm_trace_export(list, export);
394 	ftrace_exports_disable(export);
395 
396 	return ret;
397 }
398 
register_ftrace_export(struct trace_export * export)399 int register_ftrace_export(struct trace_export *export)
400 {
401 	if (WARN_ON_ONCE(!export->write))
402 		return -1;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	add_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413 
unregister_ftrace_export(struct trace_export * export)414 int unregister_ftrace_export(struct trace_export *export)
415 {
416 	int ret;
417 
418 	mutex_lock(&ftrace_export_lock);
419 
420 	ret = rm_ftrace_export(&ftrace_exports_list, export);
421 
422 	mutex_unlock(&ftrace_export_lock);
423 
424 	return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427 
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS						\
430 	(FUNCTION_DEFAULT_FLAGS |					\
431 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
432 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
433 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
434 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
435 	 TRACE_ITER_HASH_PTR)
436 
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
439 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440 
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444 
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450 	.trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452 
453 LIST_HEAD(ftrace_trace_arrays);
454 
trace_array_get(struct trace_array * this_tr)455 int trace_array_get(struct trace_array *this_tr)
456 {
457 	struct trace_array *tr;
458 	int ret = -ENODEV;
459 
460 	mutex_lock(&trace_types_lock);
461 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462 		if (tr == this_tr) {
463 			tr->ref++;
464 			ret = 0;
465 			break;
466 		}
467 	}
468 	mutex_unlock(&trace_types_lock);
469 
470 	return ret;
471 }
472 
__trace_array_put(struct trace_array * this_tr)473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475 	WARN_ON(!this_tr->ref);
476 	this_tr->ref--;
477 }
478 
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
trace_array_put(struct trace_array * this_tr)488 void trace_array_put(struct trace_array *this_tr)
489 {
490 	if (!this_tr)
491 		return;
492 
493 	mutex_lock(&trace_types_lock);
494 	__trace_array_put(this_tr);
495 	mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498 
tracing_check_open_get_tr(struct trace_array * tr)499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501 	int ret;
502 
503 	ret = security_locked_down(LOCKDOWN_TRACEFS);
504 	if (ret)
505 		return ret;
506 
507 	if (tracing_disabled)
508 		return -ENODEV;
509 
510 	if (tr && trace_array_get(tr) < 0)
511 		return -ENODEV;
512 
513 	return 0;
514 }
515 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517 			      struct trace_buffer *buffer,
518 			      struct ring_buffer_event *event)
519 {
520 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521 	    !filter_match_preds(call->filter, rec)) {
522 		__trace_event_discard_commit(buffer, event);
523 		return 1;
524 	}
525 
526 	return 0;
527 }
528 
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539 	return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541 
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554 		       struct trace_pid_list *filtered_no_pids,
555 		       struct task_struct *task)
556 {
557 	/*
558 	 * If filtered_no_pids is not empty, and the task's pid is listed
559 	 * in filtered_no_pids, then return true.
560 	 * Otherwise, if filtered_pids is empty, that means we can
561 	 * trace all tasks. If it has content, then only trace pids
562 	 * within filtered_pids.
563 	 */
564 
565 	return (filtered_pids &&
566 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
567 		(filtered_no_pids &&
568 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570 
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584 				  struct task_struct *self,
585 				  struct task_struct *task)
586 {
587 	if (!pid_list)
588 		return;
589 
590 	/* For forks, we only add if the forking task is listed */
591 	if (self) {
592 		if (!trace_find_filtered_pid(pid_list, self->pid))
593 			return;
594 	}
595 
596 	/* "self" is set for forks, and NULL for exits */
597 	if (self)
598 		trace_pid_list_set(pid_list, task->pid);
599 	else
600 		trace_pid_list_clear(pid_list, task->pid);
601 }
602 
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617 	long pid = (unsigned long)v;
618 	unsigned int next;
619 
620 	(*pos)++;
621 
622 	/* pid already is +1 of the actual previous bit */
623 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
624 		return NULL;
625 
626 	pid = next;
627 
628 	/* Return pid + 1 to allow zero to be represented */
629 	return (void *)(pid + 1);
630 }
631 
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645 	unsigned long pid;
646 	unsigned int first;
647 	loff_t l = 0;
648 
649 	if (trace_pid_list_first(pid_list, &first) < 0)
650 		return NULL;
651 
652 	pid = first;
653 
654 	/* Return pid + 1 so that zero can be the exit value */
655 	for (pid++; pid && l < *pos;
656 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657 		;
658 	return (void *)pid;
659 }
660 
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
trace_pid_show(struct seq_file * m,void * v)669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671 	unsigned long pid = (unsigned long)v - 1;
672 
673 	seq_printf(m, "%lu\n", pid);
674 	return 0;
675 }
676 
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE		127
679 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681 		    struct trace_pid_list **new_pid_list,
682 		    const char __user *ubuf, size_t cnt)
683 {
684 	struct trace_pid_list *pid_list;
685 	struct trace_parser parser;
686 	unsigned long val;
687 	int nr_pids = 0;
688 	ssize_t read = 0;
689 	ssize_t ret;
690 	loff_t pos;
691 	pid_t pid;
692 
693 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694 		return -ENOMEM;
695 
696 	/*
697 	 * Always recreate a new array. The write is an all or nothing
698 	 * operation. Always create a new array when adding new pids by
699 	 * the user. If the operation fails, then the current list is
700 	 * not modified.
701 	 */
702 	pid_list = trace_pid_list_alloc();
703 	if (!pid_list) {
704 		trace_parser_put(&parser);
705 		return -ENOMEM;
706 	}
707 
708 	if (filtered_pids) {
709 		/* copy the current bits to the new max */
710 		ret = trace_pid_list_first(filtered_pids, &pid);
711 		while (!ret) {
712 			trace_pid_list_set(pid_list, pid);
713 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714 			nr_pids++;
715 		}
716 	}
717 
718 	ret = 0;
719 	while (cnt > 0) {
720 
721 		pos = 0;
722 
723 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
724 		if (ret < 0)
725 			break;
726 
727 		read += ret;
728 		ubuf += ret;
729 		cnt -= ret;
730 
731 		if (!trace_parser_loaded(&parser))
732 			break;
733 
734 		ret = -EINVAL;
735 		if (kstrtoul(parser.buffer, 0, &val))
736 			break;
737 
738 		pid = (pid_t)val;
739 
740 		if (trace_pid_list_set(pid_list, pid) < 0) {
741 			ret = -1;
742 			break;
743 		}
744 		nr_pids++;
745 
746 		trace_parser_clear(&parser);
747 		ret = 0;
748 	}
749 	trace_parser_put(&parser);
750 
751 	if (ret < 0) {
752 		trace_pid_list_free(pid_list);
753 		return ret;
754 	}
755 
756 	if (!nr_pids) {
757 		/* Cleared the list of pids */
758 		trace_pid_list_free(pid_list);
759 		pid_list = NULL;
760 	}
761 
762 	*new_pid_list = pid_list;
763 
764 	return read;
765 }
766 
buffer_ftrace_now(struct array_buffer * buf,int cpu)767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769 	u64 ts;
770 
771 	/* Early boot up does not have a buffer yet */
772 	if (!buf->buffer)
773 		return trace_clock_local();
774 
775 	ts = ring_buffer_time_stamp(buf->buffer);
776 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777 
778 	return ts;
779 }
780 
ftrace_now(int cpu)781 u64 ftrace_now(int cpu)
782 {
783 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785 
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
tracing_is_enabled(void)795 int tracing_is_enabled(void)
796 {
797 	/*
798 	 * For quick access (irqsoff uses this in fast path), just
799 	 * return the mirror variable of the state of the ring buffer.
800 	 * It's a little racy, but we don't really care.
801 	 */
802 	smp_rmb();
803 	return !global_trace.buffer_disabled;
804 }
805 
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
817 
818 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819 
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer		*trace_types __read_mostly;
822 
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827 
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849 
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853 
trace_access_lock(int cpu)854 static inline void trace_access_lock(int cpu)
855 {
856 	if (cpu == RING_BUFFER_ALL_CPUS) {
857 		/* gain it for accessing the whole ring buffer. */
858 		down_write(&all_cpu_access_lock);
859 	} else {
860 		/* gain it for accessing a cpu ring buffer. */
861 
862 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863 		down_read(&all_cpu_access_lock);
864 
865 		/* Secondly block other access to this @cpu ring buffer. */
866 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
867 	}
868 }
869 
trace_access_unlock(int cpu)870 static inline void trace_access_unlock(int cpu)
871 {
872 	if (cpu == RING_BUFFER_ALL_CPUS) {
873 		up_write(&all_cpu_access_lock);
874 	} else {
875 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876 		up_read(&all_cpu_access_lock);
877 	}
878 }
879 
trace_access_lock_init(void)880 static inline void trace_access_lock_init(void)
881 {
882 	int cpu;
883 
884 	for_each_possible_cpu(cpu)
885 		mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887 
888 #else
889 
890 static DEFINE_MUTEX(access_lock);
891 
trace_access_lock(int cpu)892 static inline void trace_access_lock(int cpu)
893 {
894 	(void)cpu;
895 	mutex_lock(&access_lock);
896 }
897 
trace_access_unlock(int cpu)898 static inline void trace_access_unlock(int cpu)
899 {
900 	(void)cpu;
901 	mutex_unlock(&access_lock);
902 }
903 
trace_access_lock_init(void)904 static inline void trace_access_lock_init(void)
905 {
906 }
907 
908 #endif
909 
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912 				 unsigned int trace_ctx,
913 				 int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915 				      struct trace_buffer *buffer,
916 				      unsigned int trace_ctx,
917 				      int skip, struct pt_regs *regs);
918 
919 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921 					unsigned int trace_ctx,
922 					int skip, struct pt_regs *regs)
923 {
924 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)925 static inline void ftrace_trace_stack(struct trace_array *tr,
926 				      struct trace_buffer *buffer,
927 				      unsigned long trace_ctx,
928 				      int skip, struct pt_regs *regs)
929 {
930 }
931 
932 #endif
933 
934 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)935 trace_event_setup(struct ring_buffer_event *event,
936 		  int type, unsigned int trace_ctx)
937 {
938 	struct trace_entry *ent = ring_buffer_event_data(event);
939 
940 	tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942 
943 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945 			  int type,
946 			  unsigned long len,
947 			  unsigned int trace_ctx)
948 {
949 	struct ring_buffer_event *event;
950 
951 	event = ring_buffer_lock_reserve(buffer, len);
952 	if (event != NULL)
953 		trace_event_setup(event, type, trace_ctx);
954 
955 	return event;
956 }
957 
tracer_tracing_on(struct trace_array * tr)958 void tracer_tracing_on(struct trace_array *tr)
959 {
960 	if (tr->array_buffer.buffer)
961 		ring_buffer_record_on(tr->array_buffer.buffer);
962 	/*
963 	 * This flag is looked at when buffers haven't been allocated
964 	 * yet, or by some tracers (like irqsoff), that just want to
965 	 * know if the ring buffer has been disabled, but it can handle
966 	 * races of where it gets disabled but we still do a record.
967 	 * As the check is in the fast path of the tracers, it is more
968 	 * important to be fast than accurate.
969 	 */
970 	tr->buffer_disabled = 0;
971 	/* Make the flag seen by readers */
972 	smp_wmb();
973 }
974 
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
tracing_on(void)981 void tracing_on(void)
982 {
983 	tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986 
987 
988 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991 	__this_cpu_write(trace_taskinfo_save, true);
992 
993 	/* If this is the temp buffer, we need to commit fully */
994 	if (this_cpu_read(trace_buffered_event) == event) {
995 		/* Length is in event->array[0] */
996 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
997 		/* Release the temp buffer */
998 		this_cpu_dec(trace_buffered_event_cnt);
999 		/* ring_buffer_unlock_commit() enables preemption */
1000 		preempt_enable_notrace();
1001 	} else
1002 		ring_buffer_unlock_commit(buffer, event);
1003 }
1004 
1005 /**
1006  * __trace_puts - write a constant string into the trace buffer.
1007  * @ip:	   The address of the caller
1008  * @str:   The constant string to write
1009  * @size:  The size of the string.
1010  */
__trace_puts(unsigned long ip,const char * str,int size)1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013 	struct ring_buffer_event *event;
1014 	struct trace_buffer *buffer;
1015 	struct print_entry *entry;
1016 	unsigned int trace_ctx;
1017 	int alloc;
1018 
1019 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020 		return 0;
1021 
1022 	if (unlikely(tracing_selftest_running || tracing_disabled))
1023 		return 0;
1024 
1025 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026 
1027 	trace_ctx = tracing_gen_ctx();
1028 	buffer = global_trace.array_buffer.buffer;
1029 	ring_buffer_nest_start(buffer);
1030 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031 					    trace_ctx);
1032 	if (!event) {
1033 		size = 0;
1034 		goto out;
1035 	}
1036 
1037 	entry = ring_buffer_event_data(event);
1038 	entry->ip = ip;
1039 
1040 	memcpy(&entry->buf, str, size);
1041 
1042 	/* Add a newline if necessary */
1043 	if (entry->buf[size - 1] != '\n') {
1044 		entry->buf[size] = '\n';
1045 		entry->buf[size + 1] = '\0';
1046 	} else
1047 		entry->buf[size] = '\0';
1048 
1049 	__buffer_unlock_commit(buffer, event);
1050 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051  out:
1052 	ring_buffer_nest_end(buffer);
1053 	return size;
1054 }
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1056 
1057 /**
1058  * __trace_bputs - write the pointer to a constant string into trace buffer
1059  * @ip:	   The address of the caller
1060  * @str:   The constant string to write to the buffer to
1061  */
__trace_bputs(unsigned long ip,const char * str)1062 int __trace_bputs(unsigned long ip, const char *str)
1063 {
1064 	struct ring_buffer_event *event;
1065 	struct trace_buffer *buffer;
1066 	struct bputs_entry *entry;
1067 	unsigned int trace_ctx;
1068 	int size = sizeof(struct bputs_entry);
1069 	int ret = 0;
1070 
1071 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072 		return 0;
1073 
1074 	if (unlikely(tracing_selftest_running || tracing_disabled))
1075 		return 0;
1076 
1077 	trace_ctx = tracing_gen_ctx();
1078 	buffer = global_trace.array_buffer.buffer;
1079 
1080 	ring_buffer_nest_start(buffer);
1081 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082 					    trace_ctx);
1083 	if (!event)
1084 		goto out;
1085 
1086 	entry = ring_buffer_event_data(event);
1087 	entry->ip			= ip;
1088 	entry->str			= str;
1089 
1090 	__buffer_unlock_commit(buffer, event);
1091 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092 
1093 	ret = 1;
1094  out:
1095 	ring_buffer_nest_end(buffer);
1096 	return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099 
1100 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102 					   void *cond_data)
1103 {
1104 	struct tracer *tracer = tr->current_trace;
1105 	unsigned long flags;
1106 
1107 	if (in_nmi()) {
1108 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1110 		return;
1111 	}
1112 
1113 	if (!tr->allocated_snapshot) {
1114 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115 		internal_trace_puts("*** stopping trace here!   ***\n");
1116 		tracing_off();
1117 		return;
1118 	}
1119 
1120 	/* Note, snapshot can not be used when the tracer uses it */
1121 	if (tracer->use_max_tr) {
1122 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124 		return;
1125 	}
1126 
1127 	local_irq_save(flags);
1128 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1129 	local_irq_restore(flags);
1130 }
1131 
tracing_snapshot_instance(struct trace_array * tr)1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134 	tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136 
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
tracing_snapshot(void)1151 void tracing_snapshot(void)
1152 {
1153 	struct trace_array *tr = &global_trace;
1154 
1155 	tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158 
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:		The tracing instance to snapshot
1162  * @cond_data:	The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174 	tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177 
1178 /**
1179  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180  * @tr:		The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
tracing_cond_snapshot_data(struct trace_array * tr)1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194 	void *cond_data = NULL;
1195 
1196 	arch_spin_lock(&tr->max_lock);
1197 
1198 	if (tr->cond_snapshot)
1199 		cond_data = tr->cond_snapshot->cond_data;
1200 
1201 	arch_spin_unlock(&tr->max_lock);
1202 
1203 	return cond_data;
1204 }
1205 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1206 
1207 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1208 					struct array_buffer *size_buf, int cpu_id);
1209 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1210 
tracing_alloc_snapshot_instance(struct trace_array * tr)1211 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1212 {
1213 	int ret;
1214 
1215 	if (!tr->allocated_snapshot) {
1216 
1217 		/* allocate spare buffer */
1218 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1219 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1220 		if (ret < 0)
1221 			return ret;
1222 
1223 		tr->allocated_snapshot = true;
1224 	}
1225 
1226 	return 0;
1227 }
1228 
free_snapshot(struct trace_array * tr)1229 static void free_snapshot(struct trace_array *tr)
1230 {
1231 	/*
1232 	 * We don't free the ring buffer. instead, resize it because
1233 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1234 	 * we want preserve it.
1235 	 */
1236 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1237 	set_buffer_entries(&tr->max_buffer, 1);
1238 	tracing_reset_online_cpus(&tr->max_buffer);
1239 	tr->allocated_snapshot = false;
1240 }
1241 
1242 /**
1243  * tracing_alloc_snapshot - allocate snapshot buffer.
1244  *
1245  * This only allocates the snapshot buffer if it isn't already
1246  * allocated - it doesn't also take a snapshot.
1247  *
1248  * This is meant to be used in cases where the snapshot buffer needs
1249  * to be set up for events that can't sleep but need to be able to
1250  * trigger a snapshot.
1251  */
tracing_alloc_snapshot(void)1252 int tracing_alloc_snapshot(void)
1253 {
1254 	struct trace_array *tr = &global_trace;
1255 	int ret;
1256 
1257 	ret = tracing_alloc_snapshot_instance(tr);
1258 	WARN_ON(ret < 0);
1259 
1260 	return ret;
1261 }
1262 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1263 
1264 /**
1265  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1266  *
1267  * This is similar to tracing_snapshot(), but it will allocate the
1268  * snapshot buffer if it isn't already allocated. Use this only
1269  * where it is safe to sleep, as the allocation may sleep.
1270  *
1271  * This causes a swap between the snapshot buffer and the current live
1272  * tracing buffer. You can use this to take snapshots of the live
1273  * trace when some condition is triggered, but continue to trace.
1274  */
tracing_snapshot_alloc(void)1275 void tracing_snapshot_alloc(void)
1276 {
1277 	int ret;
1278 
1279 	ret = tracing_alloc_snapshot();
1280 	if (ret < 0)
1281 		return;
1282 
1283 	tracing_snapshot();
1284 }
1285 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1286 
1287 /**
1288  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1289  * @tr:		The tracing instance
1290  * @cond_data:	User data to associate with the snapshot
1291  * @update:	Implementation of the cond_snapshot update function
1292  *
1293  * Check whether the conditional snapshot for the given instance has
1294  * already been enabled, or if the current tracer is already using a
1295  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1296  * save the cond_data and update function inside.
1297  *
1298  * Returns 0 if successful, error otherwise.
1299  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1300 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1301 				 cond_update_fn_t update)
1302 {
1303 	struct cond_snapshot *cond_snapshot;
1304 	int ret = 0;
1305 
1306 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1307 	if (!cond_snapshot)
1308 		return -ENOMEM;
1309 
1310 	cond_snapshot->cond_data = cond_data;
1311 	cond_snapshot->update = update;
1312 
1313 	mutex_lock(&trace_types_lock);
1314 
1315 	ret = tracing_alloc_snapshot_instance(tr);
1316 	if (ret)
1317 		goto fail_unlock;
1318 
1319 	if (tr->current_trace->use_max_tr) {
1320 		ret = -EBUSY;
1321 		goto fail_unlock;
1322 	}
1323 
1324 	/*
1325 	 * The cond_snapshot can only change to NULL without the
1326 	 * trace_types_lock. We don't care if we race with it going
1327 	 * to NULL, but we want to make sure that it's not set to
1328 	 * something other than NULL when we get here, which we can
1329 	 * do safely with only holding the trace_types_lock and not
1330 	 * having to take the max_lock.
1331 	 */
1332 	if (tr->cond_snapshot) {
1333 		ret = -EBUSY;
1334 		goto fail_unlock;
1335 	}
1336 
1337 	arch_spin_lock(&tr->max_lock);
1338 	tr->cond_snapshot = cond_snapshot;
1339 	arch_spin_unlock(&tr->max_lock);
1340 
1341 	mutex_unlock(&trace_types_lock);
1342 
1343 	return ret;
1344 
1345  fail_unlock:
1346 	mutex_unlock(&trace_types_lock);
1347 	kfree(cond_snapshot);
1348 	return ret;
1349 }
1350 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1351 
1352 /**
1353  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1354  * @tr:		The tracing instance
1355  *
1356  * Check whether the conditional snapshot for the given instance is
1357  * enabled; if so, free the cond_snapshot associated with it,
1358  * otherwise return -EINVAL.
1359  *
1360  * Returns 0 if successful, error otherwise.
1361  */
tracing_snapshot_cond_disable(struct trace_array * tr)1362 int tracing_snapshot_cond_disable(struct trace_array *tr)
1363 {
1364 	int ret = 0;
1365 
1366 	arch_spin_lock(&tr->max_lock);
1367 
1368 	if (!tr->cond_snapshot)
1369 		ret = -EINVAL;
1370 	else {
1371 		kfree(tr->cond_snapshot);
1372 		tr->cond_snapshot = NULL;
1373 	}
1374 
1375 	arch_spin_unlock(&tr->max_lock);
1376 
1377 	return ret;
1378 }
1379 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1380 #else
tracing_snapshot(void)1381 void tracing_snapshot(void)
1382 {
1383 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1386 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1387 {
1388 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1391 int tracing_alloc_snapshot(void)
1392 {
1393 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1394 	return -ENODEV;
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1397 void tracing_snapshot_alloc(void)
1398 {
1399 	/* Give warning */
1400 	tracing_snapshot();
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1403 void *tracing_cond_snapshot_data(struct trace_array *tr)
1404 {
1405 	return NULL;
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1408 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1409 {
1410 	return -ENODEV;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1413 int tracing_snapshot_cond_disable(struct trace_array *tr)
1414 {
1415 	return false;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1418 #endif /* CONFIG_TRACER_SNAPSHOT */
1419 
tracer_tracing_off(struct trace_array * tr)1420 void tracer_tracing_off(struct trace_array *tr)
1421 {
1422 	if (tr->array_buffer.buffer)
1423 		ring_buffer_record_off(tr->array_buffer.buffer);
1424 	/*
1425 	 * This flag is looked at when buffers haven't been allocated
1426 	 * yet, or by some tracers (like irqsoff), that just want to
1427 	 * know if the ring buffer has been disabled, but it can handle
1428 	 * races of where it gets disabled but we still do a record.
1429 	 * As the check is in the fast path of the tracers, it is more
1430 	 * important to be fast than accurate.
1431 	 */
1432 	tr->buffer_disabled = 1;
1433 	/* Make the flag seen by readers */
1434 	smp_wmb();
1435 }
1436 
1437 /**
1438  * tracing_off - turn off tracing buffers
1439  *
1440  * This function stops the tracing buffers from recording data.
1441  * It does not disable any overhead the tracers themselves may
1442  * be causing. This function simply causes all recording to
1443  * the ring buffers to fail.
1444  */
tracing_off(void)1445 void tracing_off(void)
1446 {
1447 	tracer_tracing_off(&global_trace);
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_off);
1450 
disable_trace_on_warning(void)1451 void disable_trace_on_warning(void)
1452 {
1453 	if (__disable_trace_on_warning) {
1454 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1455 			"Disabling tracing due to warning\n");
1456 		tracing_off();
1457 	}
1458 }
1459 
1460 /**
1461  * tracer_tracing_is_on - show real state of ring buffer enabled
1462  * @tr : the trace array to know if ring buffer is enabled
1463  *
1464  * Shows real state of the ring buffer if it is enabled or not.
1465  */
tracer_tracing_is_on(struct trace_array * tr)1466 bool tracer_tracing_is_on(struct trace_array *tr)
1467 {
1468 	if (tr->array_buffer.buffer)
1469 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1470 	return !tr->buffer_disabled;
1471 }
1472 
1473 /**
1474  * tracing_is_on - show state of ring buffers enabled
1475  */
tracing_is_on(void)1476 int tracing_is_on(void)
1477 {
1478 	return tracer_tracing_is_on(&global_trace);
1479 }
1480 EXPORT_SYMBOL_GPL(tracing_is_on);
1481 
set_buf_size(char * str)1482 static int __init set_buf_size(char *str)
1483 {
1484 	unsigned long buf_size;
1485 
1486 	if (!str)
1487 		return 0;
1488 	buf_size = memparse(str, &str);
1489 	/*
1490 	 * nr_entries can not be zero and the startup
1491 	 * tests require some buffer space. Therefore
1492 	 * ensure we have at least 4096 bytes of buffer.
1493 	 */
1494 	trace_buf_size = max(4096UL, buf_size);
1495 	return 1;
1496 }
1497 __setup("trace_buf_size=", set_buf_size);
1498 
set_tracing_thresh(char * str)1499 static int __init set_tracing_thresh(char *str)
1500 {
1501 	unsigned long threshold;
1502 	int ret;
1503 
1504 	if (!str)
1505 		return 0;
1506 	ret = kstrtoul(str, 0, &threshold);
1507 	if (ret < 0)
1508 		return 0;
1509 	tracing_thresh = threshold * 1000;
1510 	return 1;
1511 }
1512 __setup("tracing_thresh=", set_tracing_thresh);
1513 
nsecs_to_usecs(unsigned long nsecs)1514 unsigned long nsecs_to_usecs(unsigned long nsecs)
1515 {
1516 	return nsecs / 1000;
1517 }
1518 
1519 /*
1520  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1521  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1522  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1523  * of strings in the order that the evals (enum) were defined.
1524  */
1525 #undef C
1526 #define C(a, b) b
1527 
1528 /* These must match the bit positions in trace_iterator_flags */
1529 static const char *trace_options[] = {
1530 	TRACE_FLAGS
1531 	NULL
1532 };
1533 
1534 static struct {
1535 	u64 (*func)(void);
1536 	const char *name;
1537 	int in_ns;		/* is this clock in nanoseconds? */
1538 } trace_clocks[] = {
1539 	{ trace_clock_local,		"local",	1 },
1540 	{ trace_clock_global,		"global",	1 },
1541 	{ trace_clock_counter,		"counter",	0 },
1542 	{ trace_clock_jiffies,		"uptime",	0 },
1543 	{ trace_clock,			"perf",		1 },
1544 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1545 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1546 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1547 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1548 	ARCH_TRACE_CLOCKS
1549 };
1550 
trace_clock_in_ns(struct trace_array * tr)1551 bool trace_clock_in_ns(struct trace_array *tr)
1552 {
1553 	if (trace_clocks[tr->clock_id].in_ns)
1554 		return true;
1555 
1556 	return false;
1557 }
1558 
1559 /*
1560  * trace_parser_get_init - gets the buffer for trace parser
1561  */
trace_parser_get_init(struct trace_parser * parser,int size)1562 int trace_parser_get_init(struct trace_parser *parser, int size)
1563 {
1564 	memset(parser, 0, sizeof(*parser));
1565 
1566 	parser->buffer = kmalloc(size, GFP_KERNEL);
1567 	if (!parser->buffer)
1568 		return 1;
1569 
1570 	parser->size = size;
1571 	return 0;
1572 }
1573 
1574 /*
1575  * trace_parser_put - frees the buffer for trace parser
1576  */
trace_parser_put(struct trace_parser * parser)1577 void trace_parser_put(struct trace_parser *parser)
1578 {
1579 	kfree(parser->buffer);
1580 	parser->buffer = NULL;
1581 }
1582 
1583 /*
1584  * trace_get_user - reads the user input string separated by  space
1585  * (matched by isspace(ch))
1586  *
1587  * For each string found the 'struct trace_parser' is updated,
1588  * and the function returns.
1589  *
1590  * Returns number of bytes read.
1591  *
1592  * See kernel/trace/trace.h for 'struct trace_parser' details.
1593  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1594 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1595 	size_t cnt, loff_t *ppos)
1596 {
1597 	char ch;
1598 	size_t read = 0;
1599 	ssize_t ret;
1600 
1601 	if (!*ppos)
1602 		trace_parser_clear(parser);
1603 
1604 	ret = get_user(ch, ubuf++);
1605 	if (ret)
1606 		goto out;
1607 
1608 	read++;
1609 	cnt--;
1610 
1611 	/*
1612 	 * The parser is not finished with the last write,
1613 	 * continue reading the user input without skipping spaces.
1614 	 */
1615 	if (!parser->cont) {
1616 		/* skip white space */
1617 		while (cnt && isspace(ch)) {
1618 			ret = get_user(ch, ubuf++);
1619 			if (ret)
1620 				goto out;
1621 			read++;
1622 			cnt--;
1623 		}
1624 
1625 		parser->idx = 0;
1626 
1627 		/* only spaces were written */
1628 		if (isspace(ch) || !ch) {
1629 			*ppos += read;
1630 			ret = read;
1631 			goto out;
1632 		}
1633 	}
1634 
1635 	/* read the non-space input */
1636 	while (cnt && !isspace(ch) && ch) {
1637 		if (parser->idx < parser->size - 1)
1638 			parser->buffer[parser->idx++] = ch;
1639 		else {
1640 			ret = -EINVAL;
1641 			goto out;
1642 		}
1643 		ret = get_user(ch, ubuf++);
1644 		if (ret)
1645 			goto out;
1646 		read++;
1647 		cnt--;
1648 	}
1649 
1650 	/* We either got finished input or we have to wait for another call. */
1651 	if (isspace(ch) || !ch) {
1652 		parser->buffer[parser->idx] = 0;
1653 		parser->cont = false;
1654 	} else if (parser->idx < parser->size - 1) {
1655 		parser->cont = true;
1656 		parser->buffer[parser->idx++] = ch;
1657 		/* Make sure the parsed string always terminates with '\0'. */
1658 		parser->buffer[parser->idx] = 0;
1659 	} else {
1660 		ret = -EINVAL;
1661 		goto out;
1662 	}
1663 
1664 	*ppos += read;
1665 	ret = read;
1666 
1667 out:
1668 	return ret;
1669 }
1670 
1671 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1672 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1673 {
1674 	int len;
1675 
1676 	if (trace_seq_used(s) <= s->seq.readpos)
1677 		return -EBUSY;
1678 
1679 	len = trace_seq_used(s) - s->seq.readpos;
1680 	if (cnt > len)
1681 		cnt = len;
1682 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1683 
1684 	s->seq.readpos += cnt;
1685 	return cnt;
1686 }
1687 
1688 unsigned long __read_mostly	tracing_thresh;
1689 static const struct file_operations tracing_max_lat_fops;
1690 
1691 #ifdef LATENCY_FS_NOTIFY
1692 
1693 static struct workqueue_struct *fsnotify_wq;
1694 
latency_fsnotify_workfn(struct work_struct * work)1695 static void latency_fsnotify_workfn(struct work_struct *work)
1696 {
1697 	struct trace_array *tr = container_of(work, struct trace_array,
1698 					      fsnotify_work);
1699 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1700 }
1701 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1702 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1703 {
1704 	struct trace_array *tr = container_of(iwork, struct trace_array,
1705 					      fsnotify_irqwork);
1706 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1707 }
1708 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1709 static void trace_create_maxlat_file(struct trace_array *tr,
1710 				     struct dentry *d_tracer)
1711 {
1712 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1713 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1714 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1715 					      TRACE_MODE_WRITE,
1716 					      d_tracer, &tr->max_latency,
1717 					      &tracing_max_lat_fops);
1718 }
1719 
latency_fsnotify_init(void)1720 __init static int latency_fsnotify_init(void)
1721 {
1722 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1723 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1724 	if (!fsnotify_wq) {
1725 		pr_err("Unable to allocate tr_max_lat_wq\n");
1726 		return -ENOMEM;
1727 	}
1728 	return 0;
1729 }
1730 
1731 late_initcall_sync(latency_fsnotify_init);
1732 
latency_fsnotify(struct trace_array * tr)1733 void latency_fsnotify(struct trace_array *tr)
1734 {
1735 	if (!fsnotify_wq)
1736 		return;
1737 	/*
1738 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1739 	 * possible that we are called from __schedule() or do_idle(), which
1740 	 * could cause a deadlock.
1741 	 */
1742 	irq_work_queue(&tr->fsnotify_irqwork);
1743 }
1744 
1745 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)	\
1746 	|| defined(CONFIG_OSNOISE_TRACER)
1747 
1748 #define trace_create_maxlat_file(tr, d_tracer)				\
1749 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1750 			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1751 
1752 #else
1753 #define trace_create_maxlat_file(tr, d_tracer)	 do { } while (0)
1754 #endif
1755 
1756 #ifdef CONFIG_TRACER_MAX_TRACE
1757 /*
1758  * Copy the new maximum trace into the separate maximum-trace
1759  * structure. (this way the maximum trace is permanently saved,
1760  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1761  */
1762 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1763 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1764 {
1765 	struct array_buffer *trace_buf = &tr->array_buffer;
1766 	struct array_buffer *max_buf = &tr->max_buffer;
1767 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1768 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1769 
1770 	max_buf->cpu = cpu;
1771 	max_buf->time_start = data->preempt_timestamp;
1772 
1773 	max_data->saved_latency = tr->max_latency;
1774 	max_data->critical_start = data->critical_start;
1775 	max_data->critical_end = data->critical_end;
1776 
1777 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1778 	max_data->pid = tsk->pid;
1779 	/*
1780 	 * If tsk == current, then use current_uid(), as that does not use
1781 	 * RCU. The irq tracer can be called out of RCU scope.
1782 	 */
1783 	if (tsk == current)
1784 		max_data->uid = current_uid();
1785 	else
1786 		max_data->uid = task_uid(tsk);
1787 
1788 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1789 	max_data->policy = tsk->policy;
1790 	max_data->rt_priority = tsk->rt_priority;
1791 
1792 	/* record this tasks comm */
1793 	tracing_record_cmdline(tsk);
1794 	latency_fsnotify(tr);
1795 }
1796 
1797 /**
1798  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1799  * @tr: tracer
1800  * @tsk: the task with the latency
1801  * @cpu: The cpu that initiated the trace.
1802  * @cond_data: User data associated with a conditional snapshot
1803  *
1804  * Flip the buffers between the @tr and the max_tr and record information
1805  * about which task was the cause of this latency.
1806  */
1807 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1808 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1809 	      void *cond_data)
1810 {
1811 	if (tr->stop_count)
1812 		return;
1813 
1814 	WARN_ON_ONCE(!irqs_disabled());
1815 
1816 	if (!tr->allocated_snapshot) {
1817 		/* Only the nop tracer should hit this when disabling */
1818 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1819 		return;
1820 	}
1821 
1822 	arch_spin_lock(&tr->max_lock);
1823 
1824 	/* Inherit the recordable setting from array_buffer */
1825 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1826 		ring_buffer_record_on(tr->max_buffer.buffer);
1827 	else
1828 		ring_buffer_record_off(tr->max_buffer.buffer);
1829 
1830 #ifdef CONFIG_TRACER_SNAPSHOT
1831 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1832 		goto out_unlock;
1833 #endif
1834 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1835 
1836 	__update_max_tr(tr, tsk, cpu);
1837 
1838  out_unlock:
1839 	arch_spin_unlock(&tr->max_lock);
1840 }
1841 
1842 /**
1843  * update_max_tr_single - only copy one trace over, and reset the rest
1844  * @tr: tracer
1845  * @tsk: task with the latency
1846  * @cpu: the cpu of the buffer to copy.
1847  *
1848  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1849  */
1850 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1851 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1852 {
1853 	int ret;
1854 
1855 	if (tr->stop_count)
1856 		return;
1857 
1858 	WARN_ON_ONCE(!irqs_disabled());
1859 	if (!tr->allocated_snapshot) {
1860 		/* Only the nop tracer should hit this when disabling */
1861 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1862 		return;
1863 	}
1864 
1865 	arch_spin_lock(&tr->max_lock);
1866 
1867 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1868 
1869 	if (ret == -EBUSY) {
1870 		/*
1871 		 * We failed to swap the buffer due to a commit taking
1872 		 * place on this CPU. We fail to record, but we reset
1873 		 * the max trace buffer (no one writes directly to it)
1874 		 * and flag that it failed.
1875 		 */
1876 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1877 			"Failed to swap buffers due to commit in progress\n");
1878 	}
1879 
1880 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1881 
1882 	__update_max_tr(tr, tsk, cpu);
1883 	arch_spin_unlock(&tr->max_lock);
1884 }
1885 #endif /* CONFIG_TRACER_MAX_TRACE */
1886 
wait_on_pipe(struct trace_iterator * iter,int full)1887 static int wait_on_pipe(struct trace_iterator *iter, int full)
1888 {
1889 	/* Iterators are static, they should be filled or empty */
1890 	if (trace_buffer_iter(iter, iter->cpu_file))
1891 		return 0;
1892 
1893 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1894 				full);
1895 }
1896 
1897 #ifdef CONFIG_FTRACE_STARTUP_TEST
1898 static bool selftests_can_run;
1899 
1900 struct trace_selftests {
1901 	struct list_head		list;
1902 	struct tracer			*type;
1903 };
1904 
1905 static LIST_HEAD(postponed_selftests);
1906 
save_selftest(struct tracer * type)1907 static int save_selftest(struct tracer *type)
1908 {
1909 	struct trace_selftests *selftest;
1910 
1911 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1912 	if (!selftest)
1913 		return -ENOMEM;
1914 
1915 	selftest->type = type;
1916 	list_add(&selftest->list, &postponed_selftests);
1917 	return 0;
1918 }
1919 
run_tracer_selftest(struct tracer * type)1920 static int run_tracer_selftest(struct tracer *type)
1921 {
1922 	struct trace_array *tr = &global_trace;
1923 	struct tracer *saved_tracer = tr->current_trace;
1924 	int ret;
1925 
1926 	if (!type->selftest || tracing_selftest_disabled)
1927 		return 0;
1928 
1929 	/*
1930 	 * If a tracer registers early in boot up (before scheduling is
1931 	 * initialized and such), then do not run its selftests yet.
1932 	 * Instead, run it a little later in the boot process.
1933 	 */
1934 	if (!selftests_can_run)
1935 		return save_selftest(type);
1936 
1937 	if (!tracing_is_on()) {
1938 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1939 			type->name);
1940 		return 0;
1941 	}
1942 
1943 	/*
1944 	 * Run a selftest on this tracer.
1945 	 * Here we reset the trace buffer, and set the current
1946 	 * tracer to be this tracer. The tracer can then run some
1947 	 * internal tracing to verify that everything is in order.
1948 	 * If we fail, we do not register this tracer.
1949 	 */
1950 	tracing_reset_online_cpus(&tr->array_buffer);
1951 
1952 	tr->current_trace = type;
1953 
1954 #ifdef CONFIG_TRACER_MAX_TRACE
1955 	if (type->use_max_tr) {
1956 		/* If we expanded the buffers, make sure the max is expanded too */
1957 		if (ring_buffer_expanded)
1958 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1959 					   RING_BUFFER_ALL_CPUS);
1960 		tr->allocated_snapshot = true;
1961 	}
1962 #endif
1963 
1964 	/* the test is responsible for initializing and enabling */
1965 	pr_info("Testing tracer %s: ", type->name);
1966 	ret = type->selftest(type, tr);
1967 	/* the test is responsible for resetting too */
1968 	tr->current_trace = saved_tracer;
1969 	if (ret) {
1970 		printk(KERN_CONT "FAILED!\n");
1971 		/* Add the warning after printing 'FAILED' */
1972 		WARN_ON(1);
1973 		return -1;
1974 	}
1975 	/* Only reset on passing, to avoid touching corrupted buffers */
1976 	tracing_reset_online_cpus(&tr->array_buffer);
1977 
1978 #ifdef CONFIG_TRACER_MAX_TRACE
1979 	if (type->use_max_tr) {
1980 		tr->allocated_snapshot = false;
1981 
1982 		/* Shrink the max buffer again */
1983 		if (ring_buffer_expanded)
1984 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1985 					   RING_BUFFER_ALL_CPUS);
1986 	}
1987 #endif
1988 
1989 	printk(KERN_CONT "PASSED\n");
1990 	return 0;
1991 }
1992 
init_trace_selftests(void)1993 static __init int init_trace_selftests(void)
1994 {
1995 	struct trace_selftests *p, *n;
1996 	struct tracer *t, **last;
1997 	int ret;
1998 
1999 	selftests_can_run = true;
2000 
2001 	mutex_lock(&trace_types_lock);
2002 
2003 	if (list_empty(&postponed_selftests))
2004 		goto out;
2005 
2006 	pr_info("Running postponed tracer tests:\n");
2007 
2008 	tracing_selftest_running = true;
2009 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2010 		/* This loop can take minutes when sanitizers are enabled, so
2011 		 * lets make sure we allow RCU processing.
2012 		 */
2013 		cond_resched();
2014 		ret = run_tracer_selftest(p->type);
2015 		/* If the test fails, then warn and remove from available_tracers */
2016 		if (ret < 0) {
2017 			WARN(1, "tracer: %s failed selftest, disabling\n",
2018 			     p->type->name);
2019 			last = &trace_types;
2020 			for (t = trace_types; t; t = t->next) {
2021 				if (t == p->type) {
2022 					*last = t->next;
2023 					break;
2024 				}
2025 				last = &t->next;
2026 			}
2027 		}
2028 		list_del(&p->list);
2029 		kfree(p);
2030 	}
2031 	tracing_selftest_running = false;
2032 
2033  out:
2034 	mutex_unlock(&trace_types_lock);
2035 
2036 	return 0;
2037 }
2038 core_initcall(init_trace_selftests);
2039 #else
run_tracer_selftest(struct tracer * type)2040 static inline int run_tracer_selftest(struct tracer *type)
2041 {
2042 	return 0;
2043 }
2044 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2045 
2046 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2047 
2048 static void __init apply_trace_boot_options(void);
2049 
2050 /**
2051  * register_tracer - register a tracer with the ftrace system.
2052  * @type: the plugin for the tracer
2053  *
2054  * Register a new plugin tracer.
2055  */
register_tracer(struct tracer * type)2056 int __init register_tracer(struct tracer *type)
2057 {
2058 	struct tracer *t;
2059 	int ret = 0;
2060 
2061 	if (!type->name) {
2062 		pr_info("Tracer must have a name\n");
2063 		return -1;
2064 	}
2065 
2066 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2067 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2068 		return -1;
2069 	}
2070 
2071 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2072 		pr_warn("Can not register tracer %s due to lockdown\n",
2073 			   type->name);
2074 		return -EPERM;
2075 	}
2076 
2077 	mutex_lock(&trace_types_lock);
2078 
2079 	tracing_selftest_running = true;
2080 
2081 	for (t = trace_types; t; t = t->next) {
2082 		if (strcmp(type->name, t->name) == 0) {
2083 			/* already found */
2084 			pr_info("Tracer %s already registered\n",
2085 				type->name);
2086 			ret = -1;
2087 			goto out;
2088 		}
2089 	}
2090 
2091 	if (!type->set_flag)
2092 		type->set_flag = &dummy_set_flag;
2093 	if (!type->flags) {
2094 		/*allocate a dummy tracer_flags*/
2095 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2096 		if (!type->flags) {
2097 			ret = -ENOMEM;
2098 			goto out;
2099 		}
2100 		type->flags->val = 0;
2101 		type->flags->opts = dummy_tracer_opt;
2102 	} else
2103 		if (!type->flags->opts)
2104 			type->flags->opts = dummy_tracer_opt;
2105 
2106 	/* store the tracer for __set_tracer_option */
2107 	type->flags->trace = type;
2108 
2109 	ret = run_tracer_selftest(type);
2110 	if (ret < 0)
2111 		goto out;
2112 
2113 	type->next = trace_types;
2114 	trace_types = type;
2115 	add_tracer_options(&global_trace, type);
2116 
2117  out:
2118 	tracing_selftest_running = false;
2119 	mutex_unlock(&trace_types_lock);
2120 
2121 	if (ret || !default_bootup_tracer)
2122 		goto out_unlock;
2123 
2124 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2125 		goto out_unlock;
2126 
2127 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2128 	/* Do we want this tracer to start on bootup? */
2129 	tracing_set_tracer(&global_trace, type->name);
2130 	default_bootup_tracer = NULL;
2131 
2132 	apply_trace_boot_options();
2133 
2134 	/* disable other selftests, since this will break it. */
2135 	disable_tracing_selftest("running a tracer");
2136 
2137  out_unlock:
2138 	return ret;
2139 }
2140 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2141 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2142 {
2143 	struct trace_buffer *buffer = buf->buffer;
2144 
2145 	if (!buffer)
2146 		return;
2147 
2148 	ring_buffer_record_disable(buffer);
2149 
2150 	/* Make sure all commits have finished */
2151 	synchronize_rcu();
2152 	ring_buffer_reset_cpu(buffer, cpu);
2153 
2154 	ring_buffer_record_enable(buffer);
2155 }
2156 
tracing_reset_online_cpus(struct array_buffer * buf)2157 void tracing_reset_online_cpus(struct array_buffer *buf)
2158 {
2159 	struct trace_buffer *buffer = buf->buffer;
2160 
2161 	if (!buffer)
2162 		return;
2163 
2164 	ring_buffer_record_disable(buffer);
2165 
2166 	/* Make sure all commits have finished */
2167 	synchronize_rcu();
2168 
2169 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2170 
2171 	ring_buffer_reset_online_cpus(buffer);
2172 
2173 	ring_buffer_record_enable(buffer);
2174 }
2175 
2176 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)2177 void tracing_reset_all_online_cpus(void)
2178 {
2179 	struct trace_array *tr;
2180 
2181 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2182 		if (!tr->clear_trace)
2183 			continue;
2184 		tr->clear_trace = false;
2185 		tracing_reset_online_cpus(&tr->array_buffer);
2186 #ifdef CONFIG_TRACER_MAX_TRACE
2187 		tracing_reset_online_cpus(&tr->max_buffer);
2188 #endif
2189 	}
2190 }
2191 
2192 /*
2193  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2194  * is the tgid last observed corresponding to pid=i.
2195  */
2196 static int *tgid_map;
2197 
2198 /* The maximum valid index into tgid_map. */
2199 static size_t tgid_map_max;
2200 
2201 #define SAVED_CMDLINES_DEFAULT 128
2202 #define NO_CMDLINE_MAP UINT_MAX
2203 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2204 struct saved_cmdlines_buffer {
2205 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2206 	unsigned *map_cmdline_to_pid;
2207 	unsigned cmdline_num;
2208 	int cmdline_idx;
2209 	char *saved_cmdlines;
2210 };
2211 static struct saved_cmdlines_buffer *savedcmd;
2212 
get_saved_cmdlines(int idx)2213 static inline char *get_saved_cmdlines(int idx)
2214 {
2215 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2216 }
2217 
set_cmdline(int idx,const char * cmdline)2218 static inline void set_cmdline(int idx, const char *cmdline)
2219 {
2220 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2221 }
2222 
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)2223 static int allocate_cmdlines_buffer(unsigned int val,
2224 				    struct saved_cmdlines_buffer *s)
2225 {
2226 	s->map_cmdline_to_pid = kmalloc_array(val,
2227 					      sizeof(*s->map_cmdline_to_pid),
2228 					      GFP_KERNEL);
2229 	if (!s->map_cmdline_to_pid)
2230 		return -ENOMEM;
2231 
2232 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2233 	if (!s->saved_cmdlines) {
2234 		kfree(s->map_cmdline_to_pid);
2235 		return -ENOMEM;
2236 	}
2237 
2238 	s->cmdline_idx = 0;
2239 	s->cmdline_num = val;
2240 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2241 	       sizeof(s->map_pid_to_cmdline));
2242 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2243 	       val * sizeof(*s->map_cmdline_to_pid));
2244 
2245 	return 0;
2246 }
2247 
trace_create_savedcmd(void)2248 static int trace_create_savedcmd(void)
2249 {
2250 	int ret;
2251 
2252 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2253 	if (!savedcmd)
2254 		return -ENOMEM;
2255 
2256 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2257 	if (ret < 0) {
2258 		kfree(savedcmd);
2259 		savedcmd = NULL;
2260 		return -ENOMEM;
2261 	}
2262 
2263 	return 0;
2264 }
2265 
is_tracing_stopped(void)2266 int is_tracing_stopped(void)
2267 {
2268 	return global_trace.stop_count;
2269 }
2270 
2271 /**
2272  * tracing_start - quick start of the tracer
2273  *
2274  * If tracing is enabled but was stopped by tracing_stop,
2275  * this will start the tracer back up.
2276  */
tracing_start(void)2277 void tracing_start(void)
2278 {
2279 	struct trace_buffer *buffer;
2280 	unsigned long flags;
2281 
2282 	if (tracing_disabled)
2283 		return;
2284 
2285 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2286 	if (--global_trace.stop_count) {
2287 		if (global_trace.stop_count < 0) {
2288 			/* Someone screwed up their debugging */
2289 			WARN_ON_ONCE(1);
2290 			global_trace.stop_count = 0;
2291 		}
2292 		goto out;
2293 	}
2294 
2295 	/* Prevent the buffers from switching */
2296 	arch_spin_lock(&global_trace.max_lock);
2297 
2298 	buffer = global_trace.array_buffer.buffer;
2299 	if (buffer)
2300 		ring_buffer_record_enable(buffer);
2301 
2302 #ifdef CONFIG_TRACER_MAX_TRACE
2303 	buffer = global_trace.max_buffer.buffer;
2304 	if (buffer)
2305 		ring_buffer_record_enable(buffer);
2306 #endif
2307 
2308 	arch_spin_unlock(&global_trace.max_lock);
2309 
2310  out:
2311 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2312 }
2313 
tracing_start_tr(struct trace_array * tr)2314 static void tracing_start_tr(struct trace_array *tr)
2315 {
2316 	struct trace_buffer *buffer;
2317 	unsigned long flags;
2318 
2319 	if (tracing_disabled)
2320 		return;
2321 
2322 	/* If global, we need to also start the max tracer */
2323 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2324 		return tracing_start();
2325 
2326 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2327 
2328 	if (--tr->stop_count) {
2329 		if (tr->stop_count < 0) {
2330 			/* Someone screwed up their debugging */
2331 			WARN_ON_ONCE(1);
2332 			tr->stop_count = 0;
2333 		}
2334 		goto out;
2335 	}
2336 
2337 	buffer = tr->array_buffer.buffer;
2338 	if (buffer)
2339 		ring_buffer_record_enable(buffer);
2340 
2341  out:
2342 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2343 }
2344 
2345 /**
2346  * tracing_stop - quick stop of the tracer
2347  *
2348  * Light weight way to stop tracing. Use in conjunction with
2349  * tracing_start.
2350  */
tracing_stop(void)2351 void tracing_stop(void)
2352 {
2353 	struct trace_buffer *buffer;
2354 	unsigned long flags;
2355 
2356 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2357 	if (global_trace.stop_count++)
2358 		goto out;
2359 
2360 	/* Prevent the buffers from switching */
2361 	arch_spin_lock(&global_trace.max_lock);
2362 
2363 	buffer = global_trace.array_buffer.buffer;
2364 	if (buffer)
2365 		ring_buffer_record_disable(buffer);
2366 
2367 #ifdef CONFIG_TRACER_MAX_TRACE
2368 	buffer = global_trace.max_buffer.buffer;
2369 	if (buffer)
2370 		ring_buffer_record_disable(buffer);
2371 #endif
2372 
2373 	arch_spin_unlock(&global_trace.max_lock);
2374 
2375  out:
2376 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2377 }
2378 
tracing_stop_tr(struct trace_array * tr)2379 static void tracing_stop_tr(struct trace_array *tr)
2380 {
2381 	struct trace_buffer *buffer;
2382 	unsigned long flags;
2383 
2384 	/* If global, we need to also stop the max tracer */
2385 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2386 		return tracing_stop();
2387 
2388 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2389 	if (tr->stop_count++)
2390 		goto out;
2391 
2392 	buffer = tr->array_buffer.buffer;
2393 	if (buffer)
2394 		ring_buffer_record_disable(buffer);
2395 
2396  out:
2397 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2398 }
2399 
trace_save_cmdline(struct task_struct * tsk)2400 static int trace_save_cmdline(struct task_struct *tsk)
2401 {
2402 	unsigned tpid, idx;
2403 
2404 	/* treat recording of idle task as a success */
2405 	if (!tsk->pid)
2406 		return 1;
2407 
2408 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2409 
2410 	/*
2411 	 * It's not the end of the world if we don't get
2412 	 * the lock, but we also don't want to spin
2413 	 * nor do we want to disable interrupts,
2414 	 * so if we miss here, then better luck next time.
2415 	 */
2416 	if (!arch_spin_trylock(&trace_cmdline_lock))
2417 		return 0;
2418 
2419 	idx = savedcmd->map_pid_to_cmdline[tpid];
2420 	if (idx == NO_CMDLINE_MAP) {
2421 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2422 
2423 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2424 		savedcmd->cmdline_idx = idx;
2425 	}
2426 
2427 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2428 	set_cmdline(idx, tsk->comm);
2429 
2430 	arch_spin_unlock(&trace_cmdline_lock);
2431 
2432 	return 1;
2433 }
2434 
__trace_find_cmdline(int pid,char comm[])2435 static void __trace_find_cmdline(int pid, char comm[])
2436 {
2437 	unsigned map;
2438 	int tpid;
2439 
2440 	if (!pid) {
2441 		strcpy(comm, "<idle>");
2442 		return;
2443 	}
2444 
2445 	if (WARN_ON_ONCE(pid < 0)) {
2446 		strcpy(comm, "<XXX>");
2447 		return;
2448 	}
2449 
2450 	tpid = pid & (PID_MAX_DEFAULT - 1);
2451 	map = savedcmd->map_pid_to_cmdline[tpid];
2452 	if (map != NO_CMDLINE_MAP) {
2453 		tpid = savedcmd->map_cmdline_to_pid[map];
2454 		if (tpid == pid) {
2455 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2456 			return;
2457 		}
2458 	}
2459 	strcpy(comm, "<...>");
2460 }
2461 
trace_find_cmdline(int pid,char comm[])2462 void trace_find_cmdline(int pid, char comm[])
2463 {
2464 	preempt_disable();
2465 	arch_spin_lock(&trace_cmdline_lock);
2466 
2467 	__trace_find_cmdline(pid, comm);
2468 
2469 	arch_spin_unlock(&trace_cmdline_lock);
2470 	preempt_enable();
2471 }
2472 
trace_find_tgid_ptr(int pid)2473 static int *trace_find_tgid_ptr(int pid)
2474 {
2475 	/*
2476 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2477 	 * if we observe a non-NULL tgid_map then we also observe the correct
2478 	 * tgid_map_max.
2479 	 */
2480 	int *map = smp_load_acquire(&tgid_map);
2481 
2482 	if (unlikely(!map || pid > tgid_map_max))
2483 		return NULL;
2484 
2485 	return &map[pid];
2486 }
2487 
trace_find_tgid(int pid)2488 int trace_find_tgid(int pid)
2489 {
2490 	int *ptr = trace_find_tgid_ptr(pid);
2491 
2492 	return ptr ? *ptr : 0;
2493 }
2494 
trace_save_tgid(struct task_struct * tsk)2495 static int trace_save_tgid(struct task_struct *tsk)
2496 {
2497 	int *ptr;
2498 
2499 	/* treat recording of idle task as a success */
2500 	if (!tsk->pid)
2501 		return 1;
2502 
2503 	ptr = trace_find_tgid_ptr(tsk->pid);
2504 	if (!ptr)
2505 		return 0;
2506 
2507 	*ptr = tsk->tgid;
2508 	return 1;
2509 }
2510 
tracing_record_taskinfo_skip(int flags)2511 static bool tracing_record_taskinfo_skip(int flags)
2512 {
2513 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2514 		return true;
2515 	if (!__this_cpu_read(trace_taskinfo_save))
2516 		return true;
2517 	return false;
2518 }
2519 
2520 /**
2521  * tracing_record_taskinfo - record the task info of a task
2522  *
2523  * @task:  task to record
2524  * @flags: TRACE_RECORD_CMDLINE for recording comm
2525  *         TRACE_RECORD_TGID for recording tgid
2526  */
tracing_record_taskinfo(struct task_struct * task,int flags)2527 void tracing_record_taskinfo(struct task_struct *task, int flags)
2528 {
2529 	bool done;
2530 
2531 	if (tracing_record_taskinfo_skip(flags))
2532 		return;
2533 
2534 	/*
2535 	 * Record as much task information as possible. If some fail, continue
2536 	 * to try to record the others.
2537 	 */
2538 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2539 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2540 
2541 	/* If recording any information failed, retry again soon. */
2542 	if (!done)
2543 		return;
2544 
2545 	__this_cpu_write(trace_taskinfo_save, false);
2546 }
2547 
2548 /**
2549  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2550  *
2551  * @prev: previous task during sched_switch
2552  * @next: next task during sched_switch
2553  * @flags: TRACE_RECORD_CMDLINE for recording comm
2554  *         TRACE_RECORD_TGID for recording tgid
2555  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2556 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2557 					  struct task_struct *next, int flags)
2558 {
2559 	bool done;
2560 
2561 	if (tracing_record_taskinfo_skip(flags))
2562 		return;
2563 
2564 	/*
2565 	 * Record as much task information as possible. If some fail, continue
2566 	 * to try to record the others.
2567 	 */
2568 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2569 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2570 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2571 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2572 
2573 	/* If recording any information failed, retry again soon. */
2574 	if (!done)
2575 		return;
2576 
2577 	__this_cpu_write(trace_taskinfo_save, false);
2578 }
2579 
2580 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2581 void tracing_record_cmdline(struct task_struct *task)
2582 {
2583 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2584 }
2585 
tracing_record_tgid(struct task_struct * task)2586 void tracing_record_tgid(struct task_struct *task)
2587 {
2588 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2589 }
2590 
2591 /*
2592  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2593  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2594  * simplifies those functions and keeps them in sync.
2595  */
trace_handle_return(struct trace_seq * s)2596 enum print_line_t trace_handle_return(struct trace_seq *s)
2597 {
2598 	return trace_seq_has_overflowed(s) ?
2599 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2600 }
2601 EXPORT_SYMBOL_GPL(trace_handle_return);
2602 
migration_disable_value(void)2603 static unsigned short migration_disable_value(void)
2604 {
2605 #if defined(CONFIG_SMP)
2606 	return current->migration_disabled;
2607 #else
2608 	return 0;
2609 #endif
2610 }
2611 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2612 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2613 {
2614 	unsigned int trace_flags = irqs_status;
2615 	unsigned int pc;
2616 
2617 	pc = preempt_count();
2618 
2619 	if (pc & NMI_MASK)
2620 		trace_flags |= TRACE_FLAG_NMI;
2621 	if (pc & HARDIRQ_MASK)
2622 		trace_flags |= TRACE_FLAG_HARDIRQ;
2623 	if (in_serving_softirq())
2624 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2625 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2626 		trace_flags |= TRACE_FLAG_BH_OFF;
2627 
2628 	if (tif_need_resched())
2629 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2630 	if (test_preempt_need_resched())
2631 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2632 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2633 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2634 }
2635 
2636 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2637 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2638 			  int type,
2639 			  unsigned long len,
2640 			  unsigned int trace_ctx)
2641 {
2642 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2643 }
2644 
2645 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2646 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2647 static int trace_buffered_event_ref;
2648 
2649 /**
2650  * trace_buffered_event_enable - enable buffering events
2651  *
2652  * When events are being filtered, it is quicker to use a temporary
2653  * buffer to write the event data into if there's a likely chance
2654  * that it will not be committed. The discard of the ring buffer
2655  * is not as fast as committing, and is much slower than copying
2656  * a commit.
2657  *
2658  * When an event is to be filtered, allocate per cpu buffers to
2659  * write the event data into, and if the event is filtered and discarded
2660  * it is simply dropped, otherwise, the entire data is to be committed
2661  * in one shot.
2662  */
trace_buffered_event_enable(void)2663 void trace_buffered_event_enable(void)
2664 {
2665 	struct ring_buffer_event *event;
2666 	struct page *page;
2667 	int cpu;
2668 
2669 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2670 
2671 	if (trace_buffered_event_ref++)
2672 		return;
2673 
2674 	for_each_tracing_cpu(cpu) {
2675 		page = alloc_pages_node(cpu_to_node(cpu),
2676 					GFP_KERNEL | __GFP_NORETRY, 0);
2677 		if (!page)
2678 			goto failed;
2679 
2680 		event = page_address(page);
2681 		memset(event, 0, sizeof(*event));
2682 
2683 		per_cpu(trace_buffered_event, cpu) = event;
2684 
2685 		preempt_disable();
2686 		if (cpu == smp_processor_id() &&
2687 		    __this_cpu_read(trace_buffered_event) !=
2688 		    per_cpu(trace_buffered_event, cpu))
2689 			WARN_ON_ONCE(1);
2690 		preempt_enable();
2691 	}
2692 
2693 	return;
2694  failed:
2695 	trace_buffered_event_disable();
2696 }
2697 
enable_trace_buffered_event(void * data)2698 static void enable_trace_buffered_event(void *data)
2699 {
2700 	/* Probably not needed, but do it anyway */
2701 	smp_rmb();
2702 	this_cpu_dec(trace_buffered_event_cnt);
2703 }
2704 
disable_trace_buffered_event(void * data)2705 static void disable_trace_buffered_event(void *data)
2706 {
2707 	this_cpu_inc(trace_buffered_event_cnt);
2708 }
2709 
2710 /**
2711  * trace_buffered_event_disable - disable buffering events
2712  *
2713  * When a filter is removed, it is faster to not use the buffered
2714  * events, and to commit directly into the ring buffer. Free up
2715  * the temp buffers when there are no more users. This requires
2716  * special synchronization with current events.
2717  */
trace_buffered_event_disable(void)2718 void trace_buffered_event_disable(void)
2719 {
2720 	int cpu;
2721 
2722 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2723 
2724 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2725 		return;
2726 
2727 	if (--trace_buffered_event_ref)
2728 		return;
2729 
2730 	preempt_disable();
2731 	/* For each CPU, set the buffer as used. */
2732 	smp_call_function_many(tracing_buffer_mask,
2733 			       disable_trace_buffered_event, NULL, 1);
2734 	preempt_enable();
2735 
2736 	/* Wait for all current users to finish */
2737 	synchronize_rcu();
2738 
2739 	for_each_tracing_cpu(cpu) {
2740 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2741 		per_cpu(trace_buffered_event, cpu) = NULL;
2742 	}
2743 	/*
2744 	 * Make sure trace_buffered_event is NULL before clearing
2745 	 * trace_buffered_event_cnt.
2746 	 */
2747 	smp_wmb();
2748 
2749 	preempt_disable();
2750 	/* Do the work on each cpu */
2751 	smp_call_function_many(tracing_buffer_mask,
2752 			       enable_trace_buffered_event, NULL, 1);
2753 	preempt_enable();
2754 }
2755 
2756 static struct trace_buffer *temp_buffer;
2757 
2758 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2759 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2760 			  struct trace_event_file *trace_file,
2761 			  int type, unsigned long len,
2762 			  unsigned int trace_ctx)
2763 {
2764 	struct ring_buffer_event *entry;
2765 	struct trace_array *tr = trace_file->tr;
2766 	int val;
2767 
2768 	*current_rb = tr->array_buffer.buffer;
2769 
2770 	if (!tr->no_filter_buffering_ref &&
2771 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2772 		preempt_disable_notrace();
2773 		/*
2774 		 * Filtering is on, so try to use the per cpu buffer first.
2775 		 * This buffer will simulate a ring_buffer_event,
2776 		 * where the type_len is zero and the array[0] will
2777 		 * hold the full length.
2778 		 * (see include/linux/ring-buffer.h for details on
2779 		 *  how the ring_buffer_event is structured).
2780 		 *
2781 		 * Using a temp buffer during filtering and copying it
2782 		 * on a matched filter is quicker than writing directly
2783 		 * into the ring buffer and then discarding it when
2784 		 * it doesn't match. That is because the discard
2785 		 * requires several atomic operations to get right.
2786 		 * Copying on match and doing nothing on a failed match
2787 		 * is still quicker than no copy on match, but having
2788 		 * to discard out of the ring buffer on a failed match.
2789 		 */
2790 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2791 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2792 
2793 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2794 
2795 			/*
2796 			 * Preemption is disabled, but interrupts and NMIs
2797 			 * can still come in now. If that happens after
2798 			 * the above increment, then it will have to go
2799 			 * back to the old method of allocating the event
2800 			 * on the ring buffer, and if the filter fails, it
2801 			 * will have to call ring_buffer_discard_commit()
2802 			 * to remove it.
2803 			 *
2804 			 * Need to also check the unlikely case that the
2805 			 * length is bigger than the temp buffer size.
2806 			 * If that happens, then the reserve is pretty much
2807 			 * guaranteed to fail, as the ring buffer currently
2808 			 * only allows events less than a page. But that may
2809 			 * change in the future, so let the ring buffer reserve
2810 			 * handle the failure in that case.
2811 			 */
2812 			if (val == 1 && likely(len <= max_len)) {
2813 				trace_event_setup(entry, type, trace_ctx);
2814 				entry->array[0] = len;
2815 				/* Return with preemption disabled */
2816 				return entry;
2817 			}
2818 			this_cpu_dec(trace_buffered_event_cnt);
2819 		}
2820 		/* __trace_buffer_lock_reserve() disables preemption */
2821 		preempt_enable_notrace();
2822 	}
2823 
2824 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2825 					    trace_ctx);
2826 	/*
2827 	 * If tracing is off, but we have triggers enabled
2828 	 * we still need to look at the event data. Use the temp_buffer
2829 	 * to store the trace event for the trigger to use. It's recursive
2830 	 * safe and will not be recorded anywhere.
2831 	 */
2832 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2833 		*current_rb = temp_buffer;
2834 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2835 						    trace_ctx);
2836 	}
2837 	return entry;
2838 }
2839 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2840 
2841 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2842 static DEFINE_MUTEX(tracepoint_printk_mutex);
2843 
output_printk(struct trace_event_buffer * fbuffer)2844 static void output_printk(struct trace_event_buffer *fbuffer)
2845 {
2846 	struct trace_event_call *event_call;
2847 	struct trace_event_file *file;
2848 	struct trace_event *event;
2849 	unsigned long flags;
2850 	struct trace_iterator *iter = tracepoint_print_iter;
2851 
2852 	/* We should never get here if iter is NULL */
2853 	if (WARN_ON_ONCE(!iter))
2854 		return;
2855 
2856 	event_call = fbuffer->trace_file->event_call;
2857 	if (!event_call || !event_call->event.funcs ||
2858 	    !event_call->event.funcs->trace)
2859 		return;
2860 
2861 	file = fbuffer->trace_file;
2862 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2863 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2864 	     !filter_match_preds(file->filter, fbuffer->entry)))
2865 		return;
2866 
2867 	event = &fbuffer->trace_file->event_call->event;
2868 
2869 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2870 	trace_seq_init(&iter->seq);
2871 	iter->ent = fbuffer->entry;
2872 	event_call->event.funcs->trace(iter, 0, event);
2873 	trace_seq_putc(&iter->seq, 0);
2874 	printk("%s", iter->seq.buffer);
2875 
2876 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2877 }
2878 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2879 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2880 			     void *buffer, size_t *lenp,
2881 			     loff_t *ppos)
2882 {
2883 	int save_tracepoint_printk;
2884 	int ret;
2885 
2886 	mutex_lock(&tracepoint_printk_mutex);
2887 	save_tracepoint_printk = tracepoint_printk;
2888 
2889 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2890 
2891 	/*
2892 	 * This will force exiting early, as tracepoint_printk
2893 	 * is always zero when tracepoint_printk_iter is not allocated
2894 	 */
2895 	if (!tracepoint_print_iter)
2896 		tracepoint_printk = 0;
2897 
2898 	if (save_tracepoint_printk == tracepoint_printk)
2899 		goto out;
2900 
2901 	if (tracepoint_printk)
2902 		static_key_enable(&tracepoint_printk_key.key);
2903 	else
2904 		static_key_disable(&tracepoint_printk_key.key);
2905 
2906  out:
2907 	mutex_unlock(&tracepoint_printk_mutex);
2908 
2909 	return ret;
2910 }
2911 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2912 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2913 {
2914 	enum event_trigger_type tt = ETT_NONE;
2915 	struct trace_event_file *file = fbuffer->trace_file;
2916 
2917 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2918 			fbuffer->entry, &tt))
2919 		goto discard;
2920 
2921 	if (static_key_false(&tracepoint_printk_key.key))
2922 		output_printk(fbuffer);
2923 
2924 	if (static_branch_unlikely(&trace_event_exports_enabled))
2925 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2926 
2927 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2928 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2929 
2930 discard:
2931 	if (tt)
2932 		event_triggers_post_call(file, tt);
2933 
2934 }
2935 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2936 
2937 /*
2938  * Skip 3:
2939  *
2940  *   trace_buffer_unlock_commit_regs()
2941  *   trace_event_buffer_commit()
2942  *   trace_event_raw_event_xxx()
2943  */
2944 # define STACK_SKIP 3
2945 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2946 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2947 				     struct trace_buffer *buffer,
2948 				     struct ring_buffer_event *event,
2949 				     unsigned int trace_ctx,
2950 				     struct pt_regs *regs)
2951 {
2952 	__buffer_unlock_commit(buffer, event);
2953 
2954 	/*
2955 	 * If regs is not set, then skip the necessary functions.
2956 	 * Note, we can still get here via blktrace, wakeup tracer
2957 	 * and mmiotrace, but that's ok if they lose a function or
2958 	 * two. They are not that meaningful.
2959 	 */
2960 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2961 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2962 }
2963 
2964 /*
2965  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2966  */
2967 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2968 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2969 				   struct ring_buffer_event *event)
2970 {
2971 	__buffer_unlock_commit(buffer, event);
2972 }
2973 
2974 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)2975 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2976 	       parent_ip, unsigned int trace_ctx)
2977 {
2978 	struct trace_event_call *call = &event_function;
2979 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2980 	struct ring_buffer_event *event;
2981 	struct ftrace_entry *entry;
2982 
2983 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2984 					    trace_ctx);
2985 	if (!event)
2986 		return;
2987 	entry	= ring_buffer_event_data(event);
2988 	entry->ip			= ip;
2989 	entry->parent_ip		= parent_ip;
2990 
2991 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2992 		if (static_branch_unlikely(&trace_function_exports_enabled))
2993 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2994 		__buffer_unlock_commit(buffer, event);
2995 	}
2996 }
2997 
2998 #ifdef CONFIG_STACKTRACE
2999 
3000 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3001 #define FTRACE_KSTACK_NESTING	4
3002 
3003 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3004 
3005 struct ftrace_stack {
3006 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3007 };
3008 
3009 
3010 struct ftrace_stacks {
3011 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3012 };
3013 
3014 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3015 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3016 
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3017 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3018 				 unsigned int trace_ctx,
3019 				 int skip, struct pt_regs *regs)
3020 {
3021 	struct trace_event_call *call = &event_kernel_stack;
3022 	struct ring_buffer_event *event;
3023 	unsigned int size, nr_entries;
3024 	struct ftrace_stack *fstack;
3025 	struct stack_entry *entry;
3026 	int stackidx;
3027 
3028 	/*
3029 	 * Add one, for this function and the call to save_stack_trace()
3030 	 * If regs is set, then these functions will not be in the way.
3031 	 */
3032 #ifndef CONFIG_UNWINDER_ORC
3033 	if (!regs)
3034 		skip++;
3035 #endif
3036 
3037 	preempt_disable_notrace();
3038 
3039 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3040 
3041 	/* This should never happen. If it does, yell once and skip */
3042 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3043 		goto out;
3044 
3045 	/*
3046 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3047 	 * interrupt will either see the value pre increment or post
3048 	 * increment. If the interrupt happens pre increment it will have
3049 	 * restored the counter when it returns.  We just need a barrier to
3050 	 * keep gcc from moving things around.
3051 	 */
3052 	barrier();
3053 
3054 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3055 	size = ARRAY_SIZE(fstack->calls);
3056 
3057 	if (regs) {
3058 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3059 						   size, skip);
3060 	} else {
3061 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3062 	}
3063 
3064 	size = nr_entries * sizeof(unsigned long);
3065 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3066 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3067 				    trace_ctx);
3068 	if (!event)
3069 		goto out;
3070 	entry = ring_buffer_event_data(event);
3071 
3072 	memcpy(&entry->caller, fstack->calls, size);
3073 	entry->size = nr_entries;
3074 
3075 	if (!call_filter_check_discard(call, entry, buffer, event))
3076 		__buffer_unlock_commit(buffer, event);
3077 
3078  out:
3079 	/* Again, don't let gcc optimize things here */
3080 	barrier();
3081 	__this_cpu_dec(ftrace_stack_reserve);
3082 	preempt_enable_notrace();
3083 
3084 }
3085 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3086 static inline void ftrace_trace_stack(struct trace_array *tr,
3087 				      struct trace_buffer *buffer,
3088 				      unsigned int trace_ctx,
3089 				      int skip, struct pt_regs *regs)
3090 {
3091 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3092 		return;
3093 
3094 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3095 }
3096 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3097 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3098 		   int skip)
3099 {
3100 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3101 
3102 	if (rcu_is_watching()) {
3103 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104 		return;
3105 	}
3106 
3107 	/*
3108 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3109 	 * but if the above rcu_is_watching() failed, then the NMI
3110 	 * triggered someplace critical, and rcu_irq_enter() should
3111 	 * not be called from NMI.
3112 	 */
3113 	if (unlikely(in_nmi()))
3114 		return;
3115 
3116 	rcu_irq_enter_irqson();
3117 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3118 	rcu_irq_exit_irqson();
3119 }
3120 
3121 /**
3122  * trace_dump_stack - record a stack back trace in the trace buffer
3123  * @skip: Number of functions to skip (helper handlers)
3124  */
trace_dump_stack(int skip)3125 void trace_dump_stack(int skip)
3126 {
3127 	if (tracing_disabled || tracing_selftest_running)
3128 		return;
3129 
3130 #ifndef CONFIG_UNWINDER_ORC
3131 	/* Skip 1 to skip this function. */
3132 	skip++;
3133 #endif
3134 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3135 			     tracing_gen_ctx(), skip, NULL);
3136 }
3137 EXPORT_SYMBOL_GPL(trace_dump_stack);
3138 
3139 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3140 static DEFINE_PER_CPU(int, user_stack_count);
3141 
3142 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3143 ftrace_trace_userstack(struct trace_array *tr,
3144 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3145 {
3146 	struct trace_event_call *call = &event_user_stack;
3147 	struct ring_buffer_event *event;
3148 	struct userstack_entry *entry;
3149 
3150 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3151 		return;
3152 
3153 	/*
3154 	 * NMIs can not handle page faults, even with fix ups.
3155 	 * The save user stack can (and often does) fault.
3156 	 */
3157 	if (unlikely(in_nmi()))
3158 		return;
3159 
3160 	/*
3161 	 * prevent recursion, since the user stack tracing may
3162 	 * trigger other kernel events.
3163 	 */
3164 	preempt_disable();
3165 	if (__this_cpu_read(user_stack_count))
3166 		goto out;
3167 
3168 	__this_cpu_inc(user_stack_count);
3169 
3170 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3171 					    sizeof(*entry), trace_ctx);
3172 	if (!event)
3173 		goto out_drop_count;
3174 	entry	= ring_buffer_event_data(event);
3175 
3176 	entry->tgid		= current->tgid;
3177 	memset(&entry->caller, 0, sizeof(entry->caller));
3178 
3179 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3180 	if (!call_filter_check_discard(call, entry, buffer, event))
3181 		__buffer_unlock_commit(buffer, event);
3182 
3183  out_drop_count:
3184 	__this_cpu_dec(user_stack_count);
3185  out:
3186 	preempt_enable();
3187 }
3188 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3189 static void ftrace_trace_userstack(struct trace_array *tr,
3190 				   struct trace_buffer *buffer,
3191 				   unsigned int trace_ctx)
3192 {
3193 }
3194 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3195 
3196 #endif /* CONFIG_STACKTRACE */
3197 
3198 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3199 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3200 			  unsigned long long delta)
3201 {
3202 	entry->bottom_delta_ts = delta & U32_MAX;
3203 	entry->top_delta_ts = (delta >> 32);
3204 }
3205 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3206 void trace_last_func_repeats(struct trace_array *tr,
3207 			     struct trace_func_repeats *last_info,
3208 			     unsigned int trace_ctx)
3209 {
3210 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3211 	struct func_repeats_entry *entry;
3212 	struct ring_buffer_event *event;
3213 	u64 delta;
3214 
3215 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3216 					    sizeof(*entry), trace_ctx);
3217 	if (!event)
3218 		return;
3219 
3220 	delta = ring_buffer_event_time_stamp(buffer, event) -
3221 		last_info->ts_last_call;
3222 
3223 	entry = ring_buffer_event_data(event);
3224 	entry->ip = last_info->ip;
3225 	entry->parent_ip = last_info->parent_ip;
3226 	entry->count = last_info->count;
3227 	func_repeats_set_delta_ts(entry, delta);
3228 
3229 	__buffer_unlock_commit(buffer, event);
3230 }
3231 
3232 /* created for use with alloc_percpu */
3233 struct trace_buffer_struct {
3234 	int nesting;
3235 	char buffer[4][TRACE_BUF_SIZE];
3236 };
3237 
3238 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3239 
3240 /*
3241  * This allows for lockless recording.  If we're nested too deeply, then
3242  * this returns NULL.
3243  */
get_trace_buf(void)3244 static char *get_trace_buf(void)
3245 {
3246 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3247 
3248 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3249 		return NULL;
3250 
3251 	buffer->nesting++;
3252 
3253 	/* Interrupts must see nesting incremented before we use the buffer */
3254 	barrier();
3255 	return &buffer->buffer[buffer->nesting - 1][0];
3256 }
3257 
put_trace_buf(void)3258 static void put_trace_buf(void)
3259 {
3260 	/* Don't let the decrement of nesting leak before this */
3261 	barrier();
3262 	this_cpu_dec(trace_percpu_buffer->nesting);
3263 }
3264 
alloc_percpu_trace_buffer(void)3265 static int alloc_percpu_trace_buffer(void)
3266 {
3267 	struct trace_buffer_struct __percpu *buffers;
3268 
3269 	if (trace_percpu_buffer)
3270 		return 0;
3271 
3272 	buffers = alloc_percpu(struct trace_buffer_struct);
3273 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3274 		return -ENOMEM;
3275 
3276 	trace_percpu_buffer = buffers;
3277 	return 0;
3278 }
3279 
3280 static int buffers_allocated;
3281 
trace_printk_init_buffers(void)3282 void trace_printk_init_buffers(void)
3283 {
3284 	if (buffers_allocated)
3285 		return;
3286 
3287 	if (alloc_percpu_trace_buffer())
3288 		return;
3289 
3290 	/* trace_printk() is for debug use only. Don't use it in production. */
3291 
3292 	pr_warn("\n");
3293 	pr_warn("**********************************************************\n");
3294 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3295 	pr_warn("**                                                      **\n");
3296 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3297 	pr_warn("**                                                      **\n");
3298 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3299 	pr_warn("** unsafe for production use.                           **\n");
3300 	pr_warn("**                                                      **\n");
3301 	pr_warn("** If you see this message and you are not debugging    **\n");
3302 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3303 	pr_warn("**                                                      **\n");
3304 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3305 	pr_warn("**********************************************************\n");
3306 
3307 	/* Expand the buffers to set size */
3308 	tracing_update_buffers();
3309 
3310 	buffers_allocated = 1;
3311 
3312 	/*
3313 	 * trace_printk_init_buffers() can be called by modules.
3314 	 * If that happens, then we need to start cmdline recording
3315 	 * directly here. If the global_trace.buffer is already
3316 	 * allocated here, then this was called by module code.
3317 	 */
3318 	if (global_trace.array_buffer.buffer)
3319 		tracing_start_cmdline_record();
3320 }
3321 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3322 
trace_printk_start_comm(void)3323 void trace_printk_start_comm(void)
3324 {
3325 	/* Start tracing comms if trace printk is set */
3326 	if (!buffers_allocated)
3327 		return;
3328 	tracing_start_cmdline_record();
3329 }
3330 
trace_printk_start_stop_comm(int enabled)3331 static void trace_printk_start_stop_comm(int enabled)
3332 {
3333 	if (!buffers_allocated)
3334 		return;
3335 
3336 	if (enabled)
3337 		tracing_start_cmdline_record();
3338 	else
3339 		tracing_stop_cmdline_record();
3340 }
3341 
3342 /**
3343  * trace_vbprintk - write binary msg to tracing buffer
3344  * @ip:    The address of the caller
3345  * @fmt:   The string format to write to the buffer
3346  * @args:  Arguments for @fmt
3347  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3348 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3349 {
3350 	struct trace_event_call *call = &event_bprint;
3351 	struct ring_buffer_event *event;
3352 	struct trace_buffer *buffer;
3353 	struct trace_array *tr = &global_trace;
3354 	struct bprint_entry *entry;
3355 	unsigned int trace_ctx;
3356 	char *tbuffer;
3357 	int len = 0, size;
3358 
3359 	if (unlikely(tracing_selftest_running || tracing_disabled))
3360 		return 0;
3361 
3362 	/* Don't pollute graph traces with trace_vprintk internals */
3363 	pause_graph_tracing();
3364 
3365 	trace_ctx = tracing_gen_ctx();
3366 	preempt_disable_notrace();
3367 
3368 	tbuffer = get_trace_buf();
3369 	if (!tbuffer) {
3370 		len = 0;
3371 		goto out_nobuffer;
3372 	}
3373 
3374 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3375 
3376 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3377 		goto out_put;
3378 
3379 	size = sizeof(*entry) + sizeof(u32) * len;
3380 	buffer = tr->array_buffer.buffer;
3381 	ring_buffer_nest_start(buffer);
3382 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3383 					    trace_ctx);
3384 	if (!event)
3385 		goto out;
3386 	entry = ring_buffer_event_data(event);
3387 	entry->ip			= ip;
3388 	entry->fmt			= fmt;
3389 
3390 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3391 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3392 		__buffer_unlock_commit(buffer, event);
3393 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3394 	}
3395 
3396 out:
3397 	ring_buffer_nest_end(buffer);
3398 out_put:
3399 	put_trace_buf();
3400 
3401 out_nobuffer:
3402 	preempt_enable_notrace();
3403 	unpause_graph_tracing();
3404 
3405 	return len;
3406 }
3407 EXPORT_SYMBOL_GPL(trace_vbprintk);
3408 
3409 __printf(3, 0)
3410 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3411 __trace_array_vprintk(struct trace_buffer *buffer,
3412 		      unsigned long ip, const char *fmt, va_list args)
3413 {
3414 	struct trace_event_call *call = &event_print;
3415 	struct ring_buffer_event *event;
3416 	int len = 0, size;
3417 	struct print_entry *entry;
3418 	unsigned int trace_ctx;
3419 	char *tbuffer;
3420 
3421 	if (tracing_disabled || tracing_selftest_running)
3422 		return 0;
3423 
3424 	/* Don't pollute graph traces with trace_vprintk internals */
3425 	pause_graph_tracing();
3426 
3427 	trace_ctx = tracing_gen_ctx();
3428 	preempt_disable_notrace();
3429 
3430 
3431 	tbuffer = get_trace_buf();
3432 	if (!tbuffer) {
3433 		len = 0;
3434 		goto out_nobuffer;
3435 	}
3436 
3437 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3438 
3439 	size = sizeof(*entry) + len + 1;
3440 	ring_buffer_nest_start(buffer);
3441 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3442 					    trace_ctx);
3443 	if (!event)
3444 		goto out;
3445 	entry = ring_buffer_event_data(event);
3446 	entry->ip = ip;
3447 
3448 	memcpy(&entry->buf, tbuffer, len + 1);
3449 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3450 		__buffer_unlock_commit(buffer, event);
3451 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3452 	}
3453 
3454 out:
3455 	ring_buffer_nest_end(buffer);
3456 	put_trace_buf();
3457 
3458 out_nobuffer:
3459 	preempt_enable_notrace();
3460 	unpause_graph_tracing();
3461 
3462 	return len;
3463 }
3464 
3465 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3466 int trace_array_vprintk(struct trace_array *tr,
3467 			unsigned long ip, const char *fmt, va_list args)
3468 {
3469 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3470 }
3471 
3472 /**
3473  * trace_array_printk - Print a message to a specific instance
3474  * @tr: The instance trace_array descriptor
3475  * @ip: The instruction pointer that this is called from.
3476  * @fmt: The format to print (printf format)
3477  *
3478  * If a subsystem sets up its own instance, they have the right to
3479  * printk strings into their tracing instance buffer using this
3480  * function. Note, this function will not write into the top level
3481  * buffer (use trace_printk() for that), as writing into the top level
3482  * buffer should only have events that can be individually disabled.
3483  * trace_printk() is only used for debugging a kernel, and should not
3484  * be ever incorporated in normal use.
3485  *
3486  * trace_array_printk() can be used, as it will not add noise to the
3487  * top level tracing buffer.
3488  *
3489  * Note, trace_array_init_printk() must be called on @tr before this
3490  * can be used.
3491  */
3492 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3493 int trace_array_printk(struct trace_array *tr,
3494 		       unsigned long ip, const char *fmt, ...)
3495 {
3496 	int ret;
3497 	va_list ap;
3498 
3499 	if (!tr)
3500 		return -ENOENT;
3501 
3502 	/* This is only allowed for created instances */
3503 	if (tr == &global_trace)
3504 		return 0;
3505 
3506 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3507 		return 0;
3508 
3509 	va_start(ap, fmt);
3510 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3511 	va_end(ap);
3512 	return ret;
3513 }
3514 EXPORT_SYMBOL_GPL(trace_array_printk);
3515 
3516 /**
3517  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3518  * @tr: The trace array to initialize the buffers for
3519  *
3520  * As trace_array_printk() only writes into instances, they are OK to
3521  * have in the kernel (unlike trace_printk()). This needs to be called
3522  * before trace_array_printk() can be used on a trace_array.
3523  */
trace_array_init_printk(struct trace_array * tr)3524 int trace_array_init_printk(struct trace_array *tr)
3525 {
3526 	if (!tr)
3527 		return -ENOENT;
3528 
3529 	/* This is only allowed for created instances */
3530 	if (tr == &global_trace)
3531 		return -EINVAL;
3532 
3533 	return alloc_percpu_trace_buffer();
3534 }
3535 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3536 
3537 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3538 int trace_array_printk_buf(struct trace_buffer *buffer,
3539 			   unsigned long ip, const char *fmt, ...)
3540 {
3541 	int ret;
3542 	va_list ap;
3543 
3544 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3545 		return 0;
3546 
3547 	va_start(ap, fmt);
3548 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3549 	va_end(ap);
3550 	return ret;
3551 }
3552 
3553 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3554 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3555 {
3556 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3557 }
3558 EXPORT_SYMBOL_GPL(trace_vprintk);
3559 
trace_iterator_increment(struct trace_iterator * iter)3560 static void trace_iterator_increment(struct trace_iterator *iter)
3561 {
3562 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3563 
3564 	iter->idx++;
3565 	if (buf_iter)
3566 		ring_buffer_iter_advance(buf_iter);
3567 }
3568 
3569 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3570 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3571 		unsigned long *lost_events)
3572 {
3573 	struct ring_buffer_event *event;
3574 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3575 
3576 	if (buf_iter) {
3577 		event = ring_buffer_iter_peek(buf_iter, ts);
3578 		if (lost_events)
3579 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3580 				(unsigned long)-1 : 0;
3581 	} else {
3582 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3583 					 lost_events);
3584 	}
3585 
3586 	if (event) {
3587 		iter->ent_size = ring_buffer_event_length(event);
3588 		return ring_buffer_event_data(event);
3589 	}
3590 	iter->ent_size = 0;
3591 	return NULL;
3592 }
3593 
3594 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3595 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3596 		  unsigned long *missing_events, u64 *ent_ts)
3597 {
3598 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3599 	struct trace_entry *ent, *next = NULL;
3600 	unsigned long lost_events = 0, next_lost = 0;
3601 	int cpu_file = iter->cpu_file;
3602 	u64 next_ts = 0, ts;
3603 	int next_cpu = -1;
3604 	int next_size = 0;
3605 	int cpu;
3606 
3607 	/*
3608 	 * If we are in a per_cpu trace file, don't bother by iterating over
3609 	 * all cpu and peek directly.
3610 	 */
3611 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3612 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3613 			return NULL;
3614 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3615 		if (ent_cpu)
3616 			*ent_cpu = cpu_file;
3617 
3618 		return ent;
3619 	}
3620 
3621 	for_each_tracing_cpu(cpu) {
3622 
3623 		if (ring_buffer_empty_cpu(buffer, cpu))
3624 			continue;
3625 
3626 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3627 
3628 		/*
3629 		 * Pick the entry with the smallest timestamp:
3630 		 */
3631 		if (ent && (!next || ts < next_ts)) {
3632 			next = ent;
3633 			next_cpu = cpu;
3634 			next_ts = ts;
3635 			next_lost = lost_events;
3636 			next_size = iter->ent_size;
3637 		}
3638 	}
3639 
3640 	iter->ent_size = next_size;
3641 
3642 	if (ent_cpu)
3643 		*ent_cpu = next_cpu;
3644 
3645 	if (ent_ts)
3646 		*ent_ts = next_ts;
3647 
3648 	if (missing_events)
3649 		*missing_events = next_lost;
3650 
3651 	return next;
3652 }
3653 
3654 #define STATIC_FMT_BUF_SIZE	128
3655 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3656 
trace_iter_expand_format(struct trace_iterator * iter)3657 static char *trace_iter_expand_format(struct trace_iterator *iter)
3658 {
3659 	char *tmp;
3660 
3661 	/*
3662 	 * iter->tr is NULL when used with tp_printk, which makes
3663 	 * this get called where it is not safe to call krealloc().
3664 	 */
3665 	if (!iter->tr || iter->fmt == static_fmt_buf)
3666 		return NULL;
3667 
3668 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3669 		       GFP_KERNEL);
3670 	if (tmp) {
3671 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3672 		iter->fmt = tmp;
3673 	}
3674 
3675 	return tmp;
3676 }
3677 
3678 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str,bool star,int len)3679 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3680 			   bool star, int len)
3681 {
3682 	unsigned long addr = (unsigned long)str;
3683 	struct trace_event *trace_event;
3684 	struct trace_event_call *event;
3685 
3686 	/* Ignore strings with no length */
3687 	if (star && !len)
3688 		return true;
3689 
3690 	/* OK if part of the event data */
3691 	if ((addr >= (unsigned long)iter->ent) &&
3692 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3693 		return true;
3694 
3695 	/* OK if part of the temp seq buffer */
3696 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3697 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3698 		return true;
3699 
3700 	/* Core rodata can not be freed */
3701 	if (is_kernel_rodata(addr))
3702 		return true;
3703 
3704 	if (trace_is_tracepoint_string(str))
3705 		return true;
3706 
3707 	/*
3708 	 * Now this could be a module event, referencing core module
3709 	 * data, which is OK.
3710 	 */
3711 	if (!iter->ent)
3712 		return false;
3713 
3714 	trace_event = ftrace_find_event(iter->ent->type);
3715 	if (!trace_event)
3716 		return false;
3717 
3718 	event = container_of(trace_event, struct trace_event_call, event);
3719 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3720 		return false;
3721 
3722 	/* Would rather have rodata, but this will suffice */
3723 	if (within_module_core(addr, event->module))
3724 		return true;
3725 
3726 	return false;
3727 }
3728 
show_buffer(struct trace_seq * s)3729 static const char *show_buffer(struct trace_seq *s)
3730 {
3731 	struct seq_buf *seq = &s->seq;
3732 
3733 	seq_buf_terminate(seq);
3734 
3735 	return seq->buffer;
3736 }
3737 
3738 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3739 
test_can_verify_check(const char * fmt,...)3740 static int test_can_verify_check(const char *fmt, ...)
3741 {
3742 	char buf[16];
3743 	va_list ap;
3744 	int ret;
3745 
3746 	/*
3747 	 * The verifier is dependent on vsnprintf() modifies the va_list
3748 	 * passed to it, where it is sent as a reference. Some architectures
3749 	 * (like x86_32) passes it by value, which means that vsnprintf()
3750 	 * does not modify the va_list passed to it, and the verifier
3751 	 * would then need to be able to understand all the values that
3752 	 * vsnprintf can use. If it is passed by value, then the verifier
3753 	 * is disabled.
3754 	 */
3755 	va_start(ap, fmt);
3756 	vsnprintf(buf, 16, "%d", ap);
3757 	ret = va_arg(ap, int);
3758 	va_end(ap);
3759 
3760 	return ret;
3761 }
3762 
test_can_verify(void)3763 static void test_can_verify(void)
3764 {
3765 	if (!test_can_verify_check("%d %d", 0, 1)) {
3766 		pr_info("trace event string verifier disabled\n");
3767 		static_branch_inc(&trace_no_verify);
3768 	}
3769 }
3770 
3771 /**
3772  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3773  * @iter: The iterator that holds the seq buffer and the event being printed
3774  * @fmt: The format used to print the event
3775  * @ap: The va_list holding the data to print from @fmt.
3776  *
3777  * This writes the data into the @iter->seq buffer using the data from
3778  * @fmt and @ap. If the format has a %s, then the source of the string
3779  * is examined to make sure it is safe to print, otherwise it will
3780  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3781  * pointer.
3782  */
trace_check_vprintf(struct trace_iterator * iter,const char * fmt,va_list ap)3783 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3784 			 va_list ap)
3785 {
3786 	const char *p = fmt;
3787 	const char *str;
3788 	int i, j;
3789 
3790 	if (WARN_ON_ONCE(!fmt))
3791 		return;
3792 
3793 	if (static_branch_unlikely(&trace_no_verify))
3794 		goto print;
3795 
3796 	/* Don't bother checking when doing a ftrace_dump() */
3797 	if (iter->fmt == static_fmt_buf)
3798 		goto print;
3799 
3800 	while (*p) {
3801 		bool star = false;
3802 		int len = 0;
3803 
3804 		j = 0;
3805 
3806 		/* We only care about %s and variants */
3807 		for (i = 0; p[i]; i++) {
3808 			if (i + 1 >= iter->fmt_size) {
3809 				/*
3810 				 * If we can't expand the copy buffer,
3811 				 * just print it.
3812 				 */
3813 				if (!trace_iter_expand_format(iter))
3814 					goto print;
3815 			}
3816 
3817 			if (p[i] == '\\' && p[i+1]) {
3818 				i++;
3819 				continue;
3820 			}
3821 			if (p[i] == '%') {
3822 				/* Need to test cases like %08.*s */
3823 				for (j = 1; p[i+j]; j++) {
3824 					if (isdigit(p[i+j]) ||
3825 					    p[i+j] == '.')
3826 						continue;
3827 					if (p[i+j] == '*') {
3828 						star = true;
3829 						continue;
3830 					}
3831 					break;
3832 				}
3833 				if (p[i+j] == 's')
3834 					break;
3835 				star = false;
3836 			}
3837 			j = 0;
3838 		}
3839 		/* If no %s found then just print normally */
3840 		if (!p[i])
3841 			break;
3842 
3843 		/* Copy up to the %s, and print that */
3844 		strncpy(iter->fmt, p, i);
3845 		iter->fmt[i] = '\0';
3846 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3847 
3848 		/*
3849 		 * If iter->seq is full, the above call no longer guarantees
3850 		 * that ap is in sync with fmt processing, and further calls
3851 		 * to va_arg() can return wrong positional arguments.
3852 		 *
3853 		 * Ensure that ap is no longer used in this case.
3854 		 */
3855 		if (iter->seq.full) {
3856 			p = "";
3857 			break;
3858 		}
3859 
3860 		if (star)
3861 			len = va_arg(ap, int);
3862 
3863 		/* The ap now points to the string data of the %s */
3864 		str = va_arg(ap, const char *);
3865 
3866 		/*
3867 		 * If you hit this warning, it is likely that the
3868 		 * trace event in question used %s on a string that
3869 		 * was saved at the time of the event, but may not be
3870 		 * around when the trace is read. Use __string(),
3871 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3872 		 * instead. See samples/trace_events/trace-events-sample.h
3873 		 * for reference.
3874 		 */
3875 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3876 			      "fmt: '%s' current_buffer: '%s'",
3877 			      fmt, show_buffer(&iter->seq))) {
3878 			int ret;
3879 
3880 			/* Try to safely read the string */
3881 			if (star) {
3882 				if (len + 1 > iter->fmt_size)
3883 					len = iter->fmt_size - 1;
3884 				if (len < 0)
3885 					len = 0;
3886 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3887 				iter->fmt[len] = 0;
3888 				star = false;
3889 			} else {
3890 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3891 								  iter->fmt_size);
3892 			}
3893 			if (ret < 0)
3894 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3895 			else
3896 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3897 						 str, iter->fmt);
3898 			str = "[UNSAFE-MEMORY]";
3899 			strcpy(iter->fmt, "%s");
3900 		} else {
3901 			strncpy(iter->fmt, p + i, j + 1);
3902 			iter->fmt[j+1] = '\0';
3903 		}
3904 		if (star)
3905 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3906 		else
3907 			trace_seq_printf(&iter->seq, iter->fmt, str);
3908 
3909 		p += i + j + 1;
3910 	}
3911  print:
3912 	if (*p)
3913 		trace_seq_vprintf(&iter->seq, p, ap);
3914 }
3915 
trace_event_format(struct trace_iterator * iter,const char * fmt)3916 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3917 {
3918 	const char *p, *new_fmt;
3919 	char *q;
3920 
3921 	if (WARN_ON_ONCE(!fmt))
3922 		return fmt;
3923 
3924 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3925 		return fmt;
3926 
3927 	p = fmt;
3928 	new_fmt = q = iter->fmt;
3929 	while (*p) {
3930 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3931 			if (!trace_iter_expand_format(iter))
3932 				return fmt;
3933 
3934 			q += iter->fmt - new_fmt;
3935 			new_fmt = iter->fmt;
3936 		}
3937 
3938 		*q++ = *p++;
3939 
3940 		/* Replace %p with %px */
3941 		if (p[-1] == '%') {
3942 			if (p[0] == '%') {
3943 				*q++ = *p++;
3944 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3945 				*q++ = *p++;
3946 				*q++ = 'x';
3947 			}
3948 		}
3949 	}
3950 	*q = '\0';
3951 
3952 	return new_fmt;
3953 }
3954 
3955 #define STATIC_TEMP_BUF_SIZE	128
3956 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3957 
3958 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3959 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3960 					  int *ent_cpu, u64 *ent_ts)
3961 {
3962 	/* __find_next_entry will reset ent_size */
3963 	int ent_size = iter->ent_size;
3964 	struct trace_entry *entry;
3965 
3966 	/*
3967 	 * If called from ftrace_dump(), then the iter->temp buffer
3968 	 * will be the static_temp_buf and not created from kmalloc.
3969 	 * If the entry size is greater than the buffer, we can
3970 	 * not save it. Just return NULL in that case. This is only
3971 	 * used to add markers when two consecutive events' time
3972 	 * stamps have a large delta. See trace_print_lat_context()
3973 	 */
3974 	if (iter->temp == static_temp_buf &&
3975 	    STATIC_TEMP_BUF_SIZE < ent_size)
3976 		return NULL;
3977 
3978 	/*
3979 	 * The __find_next_entry() may call peek_next_entry(), which may
3980 	 * call ring_buffer_peek() that may make the contents of iter->ent
3981 	 * undefined. Need to copy iter->ent now.
3982 	 */
3983 	if (iter->ent && iter->ent != iter->temp) {
3984 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3985 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3986 			void *temp;
3987 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3988 			if (!temp)
3989 				return NULL;
3990 			kfree(iter->temp);
3991 			iter->temp = temp;
3992 			iter->temp_size = iter->ent_size;
3993 		}
3994 		memcpy(iter->temp, iter->ent, iter->ent_size);
3995 		iter->ent = iter->temp;
3996 	}
3997 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3998 	/* Put back the original ent_size */
3999 	iter->ent_size = ent_size;
4000 
4001 	return entry;
4002 }
4003 
4004 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)4005 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4006 {
4007 	iter->ent = __find_next_entry(iter, &iter->cpu,
4008 				      &iter->lost_events, &iter->ts);
4009 
4010 	if (iter->ent)
4011 		trace_iterator_increment(iter);
4012 
4013 	return iter->ent ? iter : NULL;
4014 }
4015 
trace_consume(struct trace_iterator * iter)4016 static void trace_consume(struct trace_iterator *iter)
4017 {
4018 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4019 			    &iter->lost_events);
4020 }
4021 
s_next(struct seq_file * m,void * v,loff_t * pos)4022 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4023 {
4024 	struct trace_iterator *iter = m->private;
4025 	int i = (int)*pos;
4026 	void *ent;
4027 
4028 	WARN_ON_ONCE(iter->leftover);
4029 
4030 	(*pos)++;
4031 
4032 	/* can't go backwards */
4033 	if (iter->idx > i)
4034 		return NULL;
4035 
4036 	if (iter->idx < 0)
4037 		ent = trace_find_next_entry_inc(iter);
4038 	else
4039 		ent = iter;
4040 
4041 	while (ent && iter->idx < i)
4042 		ent = trace_find_next_entry_inc(iter);
4043 
4044 	iter->pos = *pos;
4045 
4046 	return ent;
4047 }
4048 
tracing_iter_reset(struct trace_iterator * iter,int cpu)4049 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4050 {
4051 	struct ring_buffer_iter *buf_iter;
4052 	unsigned long entries = 0;
4053 	u64 ts;
4054 
4055 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4056 
4057 	buf_iter = trace_buffer_iter(iter, cpu);
4058 	if (!buf_iter)
4059 		return;
4060 
4061 	ring_buffer_iter_reset(buf_iter);
4062 
4063 	/*
4064 	 * We could have the case with the max latency tracers
4065 	 * that a reset never took place on a cpu. This is evident
4066 	 * by the timestamp being before the start of the buffer.
4067 	 */
4068 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4069 		if (ts >= iter->array_buffer->time_start)
4070 			break;
4071 		entries++;
4072 		ring_buffer_iter_advance(buf_iter);
4073 	}
4074 
4075 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4076 }
4077 
4078 /*
4079  * The current tracer is copied to avoid a global locking
4080  * all around.
4081  */
s_start(struct seq_file * m,loff_t * pos)4082 static void *s_start(struct seq_file *m, loff_t *pos)
4083 {
4084 	struct trace_iterator *iter = m->private;
4085 	struct trace_array *tr = iter->tr;
4086 	int cpu_file = iter->cpu_file;
4087 	void *p = NULL;
4088 	loff_t l = 0;
4089 	int cpu;
4090 
4091 	/*
4092 	 * copy the tracer to avoid using a global lock all around.
4093 	 * iter->trace is a copy of current_trace, the pointer to the
4094 	 * name may be used instead of a strcmp(), as iter->trace->name
4095 	 * will point to the same string as current_trace->name.
4096 	 */
4097 	mutex_lock(&trace_types_lock);
4098 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4099 		*iter->trace = *tr->current_trace;
4100 	mutex_unlock(&trace_types_lock);
4101 
4102 #ifdef CONFIG_TRACER_MAX_TRACE
4103 	if (iter->snapshot && iter->trace->use_max_tr)
4104 		return ERR_PTR(-EBUSY);
4105 #endif
4106 
4107 	if (*pos != iter->pos) {
4108 		iter->ent = NULL;
4109 		iter->cpu = 0;
4110 		iter->idx = -1;
4111 
4112 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4113 			for_each_tracing_cpu(cpu)
4114 				tracing_iter_reset(iter, cpu);
4115 		} else
4116 			tracing_iter_reset(iter, cpu_file);
4117 
4118 		iter->leftover = 0;
4119 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4120 			;
4121 
4122 	} else {
4123 		/*
4124 		 * If we overflowed the seq_file before, then we want
4125 		 * to just reuse the trace_seq buffer again.
4126 		 */
4127 		if (iter->leftover)
4128 			p = iter;
4129 		else {
4130 			l = *pos - 1;
4131 			p = s_next(m, p, &l);
4132 		}
4133 	}
4134 
4135 	trace_event_read_lock();
4136 	trace_access_lock(cpu_file);
4137 	return p;
4138 }
4139 
s_stop(struct seq_file * m,void * p)4140 static void s_stop(struct seq_file *m, void *p)
4141 {
4142 	struct trace_iterator *iter = m->private;
4143 
4144 #ifdef CONFIG_TRACER_MAX_TRACE
4145 	if (iter->snapshot && iter->trace->use_max_tr)
4146 		return;
4147 #endif
4148 
4149 	trace_access_unlock(iter->cpu_file);
4150 	trace_event_read_unlock();
4151 }
4152 
4153 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4154 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4155 		      unsigned long *entries, int cpu)
4156 {
4157 	unsigned long count;
4158 
4159 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4160 	/*
4161 	 * If this buffer has skipped entries, then we hold all
4162 	 * entries for the trace and we need to ignore the
4163 	 * ones before the time stamp.
4164 	 */
4165 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4166 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4167 		/* total is the same as the entries */
4168 		*total = count;
4169 	} else
4170 		*total = count +
4171 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4172 	*entries = count;
4173 }
4174 
4175 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4176 get_total_entries(struct array_buffer *buf,
4177 		  unsigned long *total, unsigned long *entries)
4178 {
4179 	unsigned long t, e;
4180 	int cpu;
4181 
4182 	*total = 0;
4183 	*entries = 0;
4184 
4185 	for_each_tracing_cpu(cpu) {
4186 		get_total_entries_cpu(buf, &t, &e, cpu);
4187 		*total += t;
4188 		*entries += e;
4189 	}
4190 }
4191 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4192 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4193 {
4194 	unsigned long total, entries;
4195 
4196 	if (!tr)
4197 		tr = &global_trace;
4198 
4199 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4200 
4201 	return entries;
4202 }
4203 
trace_total_entries(struct trace_array * tr)4204 unsigned long trace_total_entries(struct trace_array *tr)
4205 {
4206 	unsigned long total, entries;
4207 
4208 	if (!tr)
4209 		tr = &global_trace;
4210 
4211 	get_total_entries(&tr->array_buffer, &total, &entries);
4212 
4213 	return entries;
4214 }
4215 
print_lat_help_header(struct seq_file * m)4216 static void print_lat_help_header(struct seq_file *m)
4217 {
4218 	seq_puts(m, "#                    _------=> CPU#            \n"
4219 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4220 		    "#                  | / _----=> need-resched    \n"
4221 		    "#                  || / _---=> hardirq/softirq \n"
4222 		    "#                  ||| / _--=> preempt-depth   \n"
4223 		    "#                  |||| / _-=> migrate-disable \n"
4224 		    "#                  ||||| /     delay           \n"
4225 		    "#  cmd     pid     |||||| time  |   caller     \n"
4226 		    "#     \\   /        ||||||  \\    |    /       \n");
4227 }
4228 
print_event_info(struct array_buffer * buf,struct seq_file * m)4229 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4230 {
4231 	unsigned long total;
4232 	unsigned long entries;
4233 
4234 	get_total_entries(buf, &total, &entries);
4235 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4236 		   entries, total, num_online_cpus());
4237 	seq_puts(m, "#\n");
4238 }
4239 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4240 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4241 				   unsigned int flags)
4242 {
4243 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4244 
4245 	print_event_info(buf, m);
4246 
4247 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4248 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4249 }
4250 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4251 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4252 				       unsigned int flags)
4253 {
4254 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4255 	static const char space[] = "            ";
4256 	int prec = tgid ? 12 : 2;
4257 
4258 	print_event_info(buf, m);
4259 
4260 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4261 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4262 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4263 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4264 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4265 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4266 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4267 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4268 }
4269 
4270 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4271 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4272 {
4273 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4274 	struct array_buffer *buf = iter->array_buffer;
4275 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4276 	struct tracer *type = iter->trace;
4277 	unsigned long entries;
4278 	unsigned long total;
4279 	const char *name = type->name;
4280 
4281 	get_total_entries(buf, &total, &entries);
4282 
4283 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4284 		   name, UTS_RELEASE);
4285 	seq_puts(m, "# -----------------------------------"
4286 		 "---------------------------------\n");
4287 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4288 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4289 		   nsecs_to_usecs(data->saved_latency),
4290 		   entries,
4291 		   total,
4292 		   buf->cpu,
4293 		   preempt_model_none()      ? "server" :
4294 		   preempt_model_voluntary() ? "desktop" :
4295 		   preempt_model_full()      ? "preempt" :
4296 		   preempt_model_rt()        ? "preempt_rt" :
4297 		   "unknown",
4298 		   /* These are reserved for later use */
4299 		   0, 0, 0, 0);
4300 #ifdef CONFIG_SMP
4301 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4302 #else
4303 	seq_puts(m, ")\n");
4304 #endif
4305 	seq_puts(m, "#    -----------------\n");
4306 	seq_printf(m, "#    | task: %.16s-%d "
4307 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4308 		   data->comm, data->pid,
4309 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4310 		   data->policy, data->rt_priority);
4311 	seq_puts(m, "#    -----------------\n");
4312 
4313 	if (data->critical_start) {
4314 		seq_puts(m, "#  => started at: ");
4315 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4316 		trace_print_seq(m, &iter->seq);
4317 		seq_puts(m, "\n#  => ended at:   ");
4318 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4319 		trace_print_seq(m, &iter->seq);
4320 		seq_puts(m, "\n#\n");
4321 	}
4322 
4323 	seq_puts(m, "#\n");
4324 }
4325 
test_cpu_buff_start(struct trace_iterator * iter)4326 static void test_cpu_buff_start(struct trace_iterator *iter)
4327 {
4328 	struct trace_seq *s = &iter->seq;
4329 	struct trace_array *tr = iter->tr;
4330 
4331 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4332 		return;
4333 
4334 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4335 		return;
4336 
4337 	if (cpumask_available(iter->started) &&
4338 	    cpumask_test_cpu(iter->cpu, iter->started))
4339 		return;
4340 
4341 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4342 		return;
4343 
4344 	if (cpumask_available(iter->started))
4345 		cpumask_set_cpu(iter->cpu, iter->started);
4346 
4347 	/* Don't print started cpu buffer for the first entry of the trace */
4348 	if (iter->idx > 1)
4349 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4350 				iter->cpu);
4351 }
4352 
print_trace_fmt(struct trace_iterator * iter)4353 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4354 {
4355 	struct trace_array *tr = iter->tr;
4356 	struct trace_seq *s = &iter->seq;
4357 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4358 	struct trace_entry *entry;
4359 	struct trace_event *event;
4360 
4361 	entry = iter->ent;
4362 
4363 	test_cpu_buff_start(iter);
4364 
4365 	event = ftrace_find_event(entry->type);
4366 
4367 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4368 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4369 			trace_print_lat_context(iter);
4370 		else
4371 			trace_print_context(iter);
4372 	}
4373 
4374 	if (trace_seq_has_overflowed(s))
4375 		return TRACE_TYPE_PARTIAL_LINE;
4376 
4377 	if (event)
4378 		return event->funcs->trace(iter, sym_flags, event);
4379 
4380 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4381 
4382 	return trace_handle_return(s);
4383 }
4384 
print_raw_fmt(struct trace_iterator * iter)4385 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4386 {
4387 	struct trace_array *tr = iter->tr;
4388 	struct trace_seq *s = &iter->seq;
4389 	struct trace_entry *entry;
4390 	struct trace_event *event;
4391 
4392 	entry = iter->ent;
4393 
4394 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4395 		trace_seq_printf(s, "%d %d %llu ",
4396 				 entry->pid, iter->cpu, iter->ts);
4397 
4398 	if (trace_seq_has_overflowed(s))
4399 		return TRACE_TYPE_PARTIAL_LINE;
4400 
4401 	event = ftrace_find_event(entry->type);
4402 	if (event)
4403 		return event->funcs->raw(iter, 0, event);
4404 
4405 	trace_seq_printf(s, "%d ?\n", entry->type);
4406 
4407 	return trace_handle_return(s);
4408 }
4409 
print_hex_fmt(struct trace_iterator * iter)4410 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4411 {
4412 	struct trace_array *tr = iter->tr;
4413 	struct trace_seq *s = &iter->seq;
4414 	unsigned char newline = '\n';
4415 	struct trace_entry *entry;
4416 	struct trace_event *event;
4417 
4418 	entry = iter->ent;
4419 
4420 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4421 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4422 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4423 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4424 		if (trace_seq_has_overflowed(s))
4425 			return TRACE_TYPE_PARTIAL_LINE;
4426 	}
4427 
4428 	event = ftrace_find_event(entry->type);
4429 	if (event) {
4430 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4431 		if (ret != TRACE_TYPE_HANDLED)
4432 			return ret;
4433 	}
4434 
4435 	SEQ_PUT_FIELD(s, newline);
4436 
4437 	return trace_handle_return(s);
4438 }
4439 
print_bin_fmt(struct trace_iterator * iter)4440 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4441 {
4442 	struct trace_array *tr = iter->tr;
4443 	struct trace_seq *s = &iter->seq;
4444 	struct trace_entry *entry;
4445 	struct trace_event *event;
4446 
4447 	entry = iter->ent;
4448 
4449 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4450 		SEQ_PUT_FIELD(s, entry->pid);
4451 		SEQ_PUT_FIELD(s, iter->cpu);
4452 		SEQ_PUT_FIELD(s, iter->ts);
4453 		if (trace_seq_has_overflowed(s))
4454 			return TRACE_TYPE_PARTIAL_LINE;
4455 	}
4456 
4457 	event = ftrace_find_event(entry->type);
4458 	return event ? event->funcs->binary(iter, 0, event) :
4459 		TRACE_TYPE_HANDLED;
4460 }
4461 
trace_empty(struct trace_iterator * iter)4462 int trace_empty(struct trace_iterator *iter)
4463 {
4464 	struct ring_buffer_iter *buf_iter;
4465 	int cpu;
4466 
4467 	/* If we are looking at one CPU buffer, only check that one */
4468 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4469 		cpu = iter->cpu_file;
4470 		buf_iter = trace_buffer_iter(iter, cpu);
4471 		if (buf_iter) {
4472 			if (!ring_buffer_iter_empty(buf_iter))
4473 				return 0;
4474 		} else {
4475 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4476 				return 0;
4477 		}
4478 		return 1;
4479 	}
4480 
4481 	for_each_tracing_cpu(cpu) {
4482 		buf_iter = trace_buffer_iter(iter, cpu);
4483 		if (buf_iter) {
4484 			if (!ring_buffer_iter_empty(buf_iter))
4485 				return 0;
4486 		} else {
4487 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4488 				return 0;
4489 		}
4490 	}
4491 
4492 	return 1;
4493 }
4494 
4495 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4496 enum print_line_t print_trace_line(struct trace_iterator *iter)
4497 {
4498 	struct trace_array *tr = iter->tr;
4499 	unsigned long trace_flags = tr->trace_flags;
4500 	enum print_line_t ret;
4501 
4502 	if (iter->lost_events) {
4503 		if (iter->lost_events == (unsigned long)-1)
4504 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4505 					 iter->cpu);
4506 		else
4507 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4508 					 iter->cpu, iter->lost_events);
4509 		if (trace_seq_has_overflowed(&iter->seq))
4510 			return TRACE_TYPE_PARTIAL_LINE;
4511 	}
4512 
4513 	if (iter->trace && iter->trace->print_line) {
4514 		ret = iter->trace->print_line(iter);
4515 		if (ret != TRACE_TYPE_UNHANDLED)
4516 			return ret;
4517 	}
4518 
4519 	if (iter->ent->type == TRACE_BPUTS &&
4520 			trace_flags & TRACE_ITER_PRINTK &&
4521 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4522 		return trace_print_bputs_msg_only(iter);
4523 
4524 	if (iter->ent->type == TRACE_BPRINT &&
4525 			trace_flags & TRACE_ITER_PRINTK &&
4526 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4527 		return trace_print_bprintk_msg_only(iter);
4528 
4529 	if (iter->ent->type == TRACE_PRINT &&
4530 			trace_flags & TRACE_ITER_PRINTK &&
4531 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4532 		return trace_print_printk_msg_only(iter);
4533 
4534 	if (trace_flags & TRACE_ITER_BIN)
4535 		return print_bin_fmt(iter);
4536 
4537 	if (trace_flags & TRACE_ITER_HEX)
4538 		return print_hex_fmt(iter);
4539 
4540 	if (trace_flags & TRACE_ITER_RAW)
4541 		return print_raw_fmt(iter);
4542 
4543 	return print_trace_fmt(iter);
4544 }
4545 
trace_latency_header(struct seq_file * m)4546 void trace_latency_header(struct seq_file *m)
4547 {
4548 	struct trace_iterator *iter = m->private;
4549 	struct trace_array *tr = iter->tr;
4550 
4551 	/* print nothing if the buffers are empty */
4552 	if (trace_empty(iter))
4553 		return;
4554 
4555 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4556 		print_trace_header(m, iter);
4557 
4558 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4559 		print_lat_help_header(m);
4560 }
4561 
trace_default_header(struct seq_file * m)4562 void trace_default_header(struct seq_file *m)
4563 {
4564 	struct trace_iterator *iter = m->private;
4565 	struct trace_array *tr = iter->tr;
4566 	unsigned long trace_flags = tr->trace_flags;
4567 
4568 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4569 		return;
4570 
4571 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4572 		/* print nothing if the buffers are empty */
4573 		if (trace_empty(iter))
4574 			return;
4575 		print_trace_header(m, iter);
4576 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4577 			print_lat_help_header(m);
4578 	} else {
4579 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4580 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4581 				print_func_help_header_irq(iter->array_buffer,
4582 							   m, trace_flags);
4583 			else
4584 				print_func_help_header(iter->array_buffer, m,
4585 						       trace_flags);
4586 		}
4587 	}
4588 }
4589 
test_ftrace_alive(struct seq_file * m)4590 static void test_ftrace_alive(struct seq_file *m)
4591 {
4592 	if (!ftrace_is_dead())
4593 		return;
4594 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4595 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4596 }
4597 
4598 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4599 static void show_snapshot_main_help(struct seq_file *m)
4600 {
4601 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4602 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4603 		    "#                      Takes a snapshot of the main buffer.\n"
4604 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4605 		    "#                      (Doesn't have to be '2' works with any number that\n"
4606 		    "#                       is not a '0' or '1')\n");
4607 }
4608 
show_snapshot_percpu_help(struct seq_file * m)4609 static void show_snapshot_percpu_help(struct seq_file *m)
4610 {
4611 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4612 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4613 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4614 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4615 #else
4616 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4617 		    "#                     Must use main snapshot file to allocate.\n");
4618 #endif
4619 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4620 		    "#                      (Doesn't have to be '2' works with any number that\n"
4621 		    "#                       is not a '0' or '1')\n");
4622 }
4623 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4624 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4625 {
4626 	if (iter->tr->allocated_snapshot)
4627 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4628 	else
4629 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4630 
4631 	seq_puts(m, "# Snapshot commands:\n");
4632 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4633 		show_snapshot_main_help(m);
4634 	else
4635 		show_snapshot_percpu_help(m);
4636 }
4637 #else
4638 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4639 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4640 #endif
4641 
s_show(struct seq_file * m,void * v)4642 static int s_show(struct seq_file *m, void *v)
4643 {
4644 	struct trace_iterator *iter = v;
4645 	int ret;
4646 
4647 	if (iter->ent == NULL) {
4648 		if (iter->tr) {
4649 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4650 			seq_puts(m, "#\n");
4651 			test_ftrace_alive(m);
4652 		}
4653 		if (iter->snapshot && trace_empty(iter))
4654 			print_snapshot_help(m, iter);
4655 		else if (iter->trace && iter->trace->print_header)
4656 			iter->trace->print_header(m);
4657 		else
4658 			trace_default_header(m);
4659 
4660 	} else if (iter->leftover) {
4661 		/*
4662 		 * If we filled the seq_file buffer earlier, we
4663 		 * want to just show it now.
4664 		 */
4665 		ret = trace_print_seq(m, &iter->seq);
4666 
4667 		/* ret should this time be zero, but you never know */
4668 		iter->leftover = ret;
4669 
4670 	} else {
4671 		print_trace_line(iter);
4672 		ret = trace_print_seq(m, &iter->seq);
4673 		/*
4674 		 * If we overflow the seq_file buffer, then it will
4675 		 * ask us for this data again at start up.
4676 		 * Use that instead.
4677 		 *  ret is 0 if seq_file write succeeded.
4678 		 *        -1 otherwise.
4679 		 */
4680 		iter->leftover = ret;
4681 	}
4682 
4683 	return 0;
4684 }
4685 
4686 /*
4687  * Should be used after trace_array_get(), trace_types_lock
4688  * ensures that i_cdev was already initialized.
4689  */
tracing_get_cpu(struct inode * inode)4690 static inline int tracing_get_cpu(struct inode *inode)
4691 {
4692 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4693 		return (long)inode->i_cdev - 1;
4694 	return RING_BUFFER_ALL_CPUS;
4695 }
4696 
4697 static const struct seq_operations tracer_seq_ops = {
4698 	.start		= s_start,
4699 	.next		= s_next,
4700 	.stop		= s_stop,
4701 	.show		= s_show,
4702 };
4703 
4704 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4705 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4706 {
4707 	struct trace_array *tr = inode->i_private;
4708 	struct trace_iterator *iter;
4709 	int cpu;
4710 
4711 	if (tracing_disabled)
4712 		return ERR_PTR(-ENODEV);
4713 
4714 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4715 	if (!iter)
4716 		return ERR_PTR(-ENOMEM);
4717 
4718 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4719 				    GFP_KERNEL);
4720 	if (!iter->buffer_iter)
4721 		goto release;
4722 
4723 	/*
4724 	 * trace_find_next_entry() may need to save off iter->ent.
4725 	 * It will place it into the iter->temp buffer. As most
4726 	 * events are less than 128, allocate a buffer of that size.
4727 	 * If one is greater, then trace_find_next_entry() will
4728 	 * allocate a new buffer to adjust for the bigger iter->ent.
4729 	 * It's not critical if it fails to get allocated here.
4730 	 */
4731 	iter->temp = kmalloc(128, GFP_KERNEL);
4732 	if (iter->temp)
4733 		iter->temp_size = 128;
4734 
4735 	/*
4736 	 * trace_event_printf() may need to modify given format
4737 	 * string to replace %p with %px so that it shows real address
4738 	 * instead of hash value. However, that is only for the event
4739 	 * tracing, other tracer may not need. Defer the allocation
4740 	 * until it is needed.
4741 	 */
4742 	iter->fmt = NULL;
4743 	iter->fmt_size = 0;
4744 
4745 	/*
4746 	 * We make a copy of the current tracer to avoid concurrent
4747 	 * changes on it while we are reading.
4748 	 */
4749 	mutex_lock(&trace_types_lock);
4750 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4751 	if (!iter->trace)
4752 		goto fail;
4753 
4754 	*iter->trace = *tr->current_trace;
4755 
4756 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4757 		goto fail;
4758 
4759 	iter->tr = tr;
4760 
4761 #ifdef CONFIG_TRACER_MAX_TRACE
4762 	/* Currently only the top directory has a snapshot */
4763 	if (tr->current_trace->print_max || snapshot)
4764 		iter->array_buffer = &tr->max_buffer;
4765 	else
4766 #endif
4767 		iter->array_buffer = &tr->array_buffer;
4768 	iter->snapshot = snapshot;
4769 	iter->pos = -1;
4770 	iter->cpu_file = tracing_get_cpu(inode);
4771 	mutex_init(&iter->mutex);
4772 
4773 	/* Notify the tracer early; before we stop tracing. */
4774 	if (iter->trace->open)
4775 		iter->trace->open(iter);
4776 
4777 	/* Annotate start of buffers if we had overruns */
4778 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4779 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4780 
4781 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4782 	if (trace_clocks[tr->clock_id].in_ns)
4783 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4784 
4785 	/*
4786 	 * If pause-on-trace is enabled, then stop the trace while
4787 	 * dumping, unless this is the "snapshot" file
4788 	 */
4789 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4790 		tracing_stop_tr(tr);
4791 
4792 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4793 		for_each_tracing_cpu(cpu) {
4794 			iter->buffer_iter[cpu] =
4795 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4796 							 cpu, GFP_KERNEL);
4797 		}
4798 		ring_buffer_read_prepare_sync();
4799 		for_each_tracing_cpu(cpu) {
4800 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4801 			tracing_iter_reset(iter, cpu);
4802 		}
4803 	} else {
4804 		cpu = iter->cpu_file;
4805 		iter->buffer_iter[cpu] =
4806 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4807 						 cpu, GFP_KERNEL);
4808 		ring_buffer_read_prepare_sync();
4809 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4810 		tracing_iter_reset(iter, cpu);
4811 	}
4812 
4813 	mutex_unlock(&trace_types_lock);
4814 
4815 	return iter;
4816 
4817  fail:
4818 	mutex_unlock(&trace_types_lock);
4819 	kfree(iter->trace);
4820 	kfree(iter->temp);
4821 	kfree(iter->buffer_iter);
4822 release:
4823 	seq_release_private(inode, file);
4824 	return ERR_PTR(-ENOMEM);
4825 }
4826 
tracing_open_generic(struct inode * inode,struct file * filp)4827 int tracing_open_generic(struct inode *inode, struct file *filp)
4828 {
4829 	int ret;
4830 
4831 	ret = tracing_check_open_get_tr(NULL);
4832 	if (ret)
4833 		return ret;
4834 
4835 	filp->private_data = inode->i_private;
4836 	return 0;
4837 }
4838 
tracing_is_disabled(void)4839 bool tracing_is_disabled(void)
4840 {
4841 	return (tracing_disabled) ? true: false;
4842 }
4843 
4844 /*
4845  * Open and update trace_array ref count.
4846  * Must have the current trace_array passed to it.
4847  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4848 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4849 {
4850 	struct trace_array *tr = inode->i_private;
4851 	int ret;
4852 
4853 	ret = tracing_check_open_get_tr(tr);
4854 	if (ret)
4855 		return ret;
4856 
4857 	filp->private_data = inode->i_private;
4858 
4859 	return 0;
4860 }
4861 
tracing_mark_open(struct inode * inode,struct file * filp)4862 static int tracing_mark_open(struct inode *inode, struct file *filp)
4863 {
4864 	stream_open(inode, filp);
4865 	return tracing_open_generic_tr(inode, filp);
4866 }
4867 
tracing_release(struct inode * inode,struct file * file)4868 static int tracing_release(struct inode *inode, struct file *file)
4869 {
4870 	struct trace_array *tr = inode->i_private;
4871 	struct seq_file *m = file->private_data;
4872 	struct trace_iterator *iter;
4873 	int cpu;
4874 
4875 	if (!(file->f_mode & FMODE_READ)) {
4876 		trace_array_put(tr);
4877 		return 0;
4878 	}
4879 
4880 	/* Writes do not use seq_file */
4881 	iter = m->private;
4882 	mutex_lock(&trace_types_lock);
4883 
4884 	for_each_tracing_cpu(cpu) {
4885 		if (iter->buffer_iter[cpu])
4886 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4887 	}
4888 
4889 	if (iter->trace && iter->trace->close)
4890 		iter->trace->close(iter);
4891 
4892 	if (!iter->snapshot && tr->stop_count)
4893 		/* reenable tracing if it was previously enabled */
4894 		tracing_start_tr(tr);
4895 
4896 	__trace_array_put(tr);
4897 
4898 	mutex_unlock(&trace_types_lock);
4899 
4900 	mutex_destroy(&iter->mutex);
4901 	free_cpumask_var(iter->started);
4902 	kfree(iter->fmt);
4903 	kfree(iter->temp);
4904 	kfree(iter->trace);
4905 	kfree(iter->buffer_iter);
4906 	seq_release_private(inode, file);
4907 
4908 	return 0;
4909 }
4910 
tracing_release_generic_tr(struct inode * inode,struct file * file)4911 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4912 {
4913 	struct trace_array *tr = inode->i_private;
4914 
4915 	trace_array_put(tr);
4916 	return 0;
4917 }
4918 
tracing_single_release_tr(struct inode * inode,struct file * file)4919 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4920 {
4921 	struct trace_array *tr = inode->i_private;
4922 
4923 	trace_array_put(tr);
4924 
4925 	return single_release(inode, file);
4926 }
4927 
tracing_open(struct inode * inode,struct file * file)4928 static int tracing_open(struct inode *inode, struct file *file)
4929 {
4930 	struct trace_array *tr = inode->i_private;
4931 	struct trace_iterator *iter;
4932 	int ret;
4933 
4934 	ret = tracing_check_open_get_tr(tr);
4935 	if (ret)
4936 		return ret;
4937 
4938 	/* If this file was open for write, then erase contents */
4939 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4940 		int cpu = tracing_get_cpu(inode);
4941 		struct array_buffer *trace_buf = &tr->array_buffer;
4942 
4943 #ifdef CONFIG_TRACER_MAX_TRACE
4944 		if (tr->current_trace->print_max)
4945 			trace_buf = &tr->max_buffer;
4946 #endif
4947 
4948 		if (cpu == RING_BUFFER_ALL_CPUS)
4949 			tracing_reset_online_cpus(trace_buf);
4950 		else
4951 			tracing_reset_cpu(trace_buf, cpu);
4952 	}
4953 
4954 	if (file->f_mode & FMODE_READ) {
4955 		iter = __tracing_open(inode, file, false);
4956 		if (IS_ERR(iter))
4957 			ret = PTR_ERR(iter);
4958 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4959 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4960 	}
4961 
4962 	if (ret < 0)
4963 		trace_array_put(tr);
4964 
4965 	return ret;
4966 }
4967 
4968 /*
4969  * Some tracers are not suitable for instance buffers.
4970  * A tracer is always available for the global array (toplevel)
4971  * or if it explicitly states that it is.
4972  */
4973 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4974 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4975 {
4976 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4977 }
4978 
4979 /* Find the next tracer that this trace array may use */
4980 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4981 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4982 {
4983 	while (t && !trace_ok_for_array(t, tr))
4984 		t = t->next;
4985 
4986 	return t;
4987 }
4988 
4989 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4990 t_next(struct seq_file *m, void *v, loff_t *pos)
4991 {
4992 	struct trace_array *tr = m->private;
4993 	struct tracer *t = v;
4994 
4995 	(*pos)++;
4996 
4997 	if (t)
4998 		t = get_tracer_for_array(tr, t->next);
4999 
5000 	return t;
5001 }
5002 
t_start(struct seq_file * m,loff_t * pos)5003 static void *t_start(struct seq_file *m, loff_t *pos)
5004 {
5005 	struct trace_array *tr = m->private;
5006 	struct tracer *t;
5007 	loff_t l = 0;
5008 
5009 	mutex_lock(&trace_types_lock);
5010 
5011 	t = get_tracer_for_array(tr, trace_types);
5012 	for (; t && l < *pos; t = t_next(m, t, &l))
5013 			;
5014 
5015 	return t;
5016 }
5017 
t_stop(struct seq_file * m,void * p)5018 static void t_stop(struct seq_file *m, void *p)
5019 {
5020 	mutex_unlock(&trace_types_lock);
5021 }
5022 
t_show(struct seq_file * m,void * v)5023 static int t_show(struct seq_file *m, void *v)
5024 {
5025 	struct tracer *t = v;
5026 
5027 	if (!t)
5028 		return 0;
5029 
5030 	seq_puts(m, t->name);
5031 	if (t->next)
5032 		seq_putc(m, ' ');
5033 	else
5034 		seq_putc(m, '\n');
5035 
5036 	return 0;
5037 }
5038 
5039 static const struct seq_operations show_traces_seq_ops = {
5040 	.start		= t_start,
5041 	.next		= t_next,
5042 	.stop		= t_stop,
5043 	.show		= t_show,
5044 };
5045 
show_traces_open(struct inode * inode,struct file * file)5046 static int show_traces_open(struct inode *inode, struct file *file)
5047 {
5048 	struct trace_array *tr = inode->i_private;
5049 	struct seq_file *m;
5050 	int ret;
5051 
5052 	ret = tracing_check_open_get_tr(tr);
5053 	if (ret)
5054 		return ret;
5055 
5056 	ret = seq_open(file, &show_traces_seq_ops);
5057 	if (ret) {
5058 		trace_array_put(tr);
5059 		return ret;
5060 	}
5061 
5062 	m = file->private_data;
5063 	m->private = tr;
5064 
5065 	return 0;
5066 }
5067 
show_traces_release(struct inode * inode,struct file * file)5068 static int show_traces_release(struct inode *inode, struct file *file)
5069 {
5070 	struct trace_array *tr = inode->i_private;
5071 
5072 	trace_array_put(tr);
5073 	return seq_release(inode, file);
5074 }
5075 
5076 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5077 tracing_write_stub(struct file *filp, const char __user *ubuf,
5078 		   size_t count, loff_t *ppos)
5079 {
5080 	return count;
5081 }
5082 
tracing_lseek(struct file * file,loff_t offset,int whence)5083 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5084 {
5085 	int ret;
5086 
5087 	if (file->f_mode & FMODE_READ)
5088 		ret = seq_lseek(file, offset, whence);
5089 	else
5090 		file->f_pos = ret = 0;
5091 
5092 	return ret;
5093 }
5094 
5095 static const struct file_operations tracing_fops = {
5096 	.open		= tracing_open,
5097 	.read		= seq_read,
5098 	.write		= tracing_write_stub,
5099 	.llseek		= tracing_lseek,
5100 	.release	= tracing_release,
5101 };
5102 
5103 static const struct file_operations show_traces_fops = {
5104 	.open		= show_traces_open,
5105 	.read		= seq_read,
5106 	.llseek		= seq_lseek,
5107 	.release	= show_traces_release,
5108 };
5109 
5110 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5111 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5112 		     size_t count, loff_t *ppos)
5113 {
5114 	struct trace_array *tr = file_inode(filp)->i_private;
5115 	char *mask_str;
5116 	int len;
5117 
5118 	len = snprintf(NULL, 0, "%*pb\n",
5119 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5120 	mask_str = kmalloc(len, GFP_KERNEL);
5121 	if (!mask_str)
5122 		return -ENOMEM;
5123 
5124 	len = snprintf(mask_str, len, "%*pb\n",
5125 		       cpumask_pr_args(tr->tracing_cpumask));
5126 	if (len >= count) {
5127 		count = -EINVAL;
5128 		goto out_err;
5129 	}
5130 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5131 
5132 out_err:
5133 	kfree(mask_str);
5134 
5135 	return count;
5136 }
5137 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5138 int tracing_set_cpumask(struct trace_array *tr,
5139 			cpumask_var_t tracing_cpumask_new)
5140 {
5141 	int cpu;
5142 
5143 	if (!tr)
5144 		return -EINVAL;
5145 
5146 	local_irq_disable();
5147 	arch_spin_lock(&tr->max_lock);
5148 	for_each_tracing_cpu(cpu) {
5149 		/*
5150 		 * Increase/decrease the disabled counter if we are
5151 		 * about to flip a bit in the cpumask:
5152 		 */
5153 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5154 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5155 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5156 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5157 		}
5158 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5159 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5160 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5161 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5162 		}
5163 	}
5164 	arch_spin_unlock(&tr->max_lock);
5165 	local_irq_enable();
5166 
5167 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5168 
5169 	return 0;
5170 }
5171 
5172 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5173 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5174 		      size_t count, loff_t *ppos)
5175 {
5176 	struct trace_array *tr = file_inode(filp)->i_private;
5177 	cpumask_var_t tracing_cpumask_new;
5178 	int err;
5179 
5180 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5181 		return -ENOMEM;
5182 
5183 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5184 	if (err)
5185 		goto err_free;
5186 
5187 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5188 	if (err)
5189 		goto err_free;
5190 
5191 	free_cpumask_var(tracing_cpumask_new);
5192 
5193 	return count;
5194 
5195 err_free:
5196 	free_cpumask_var(tracing_cpumask_new);
5197 
5198 	return err;
5199 }
5200 
5201 static const struct file_operations tracing_cpumask_fops = {
5202 	.open		= tracing_open_generic_tr,
5203 	.read		= tracing_cpumask_read,
5204 	.write		= tracing_cpumask_write,
5205 	.release	= tracing_release_generic_tr,
5206 	.llseek		= generic_file_llseek,
5207 };
5208 
tracing_trace_options_show(struct seq_file * m,void * v)5209 static int tracing_trace_options_show(struct seq_file *m, void *v)
5210 {
5211 	struct tracer_opt *trace_opts;
5212 	struct trace_array *tr = m->private;
5213 	u32 tracer_flags;
5214 	int i;
5215 
5216 	mutex_lock(&trace_types_lock);
5217 	tracer_flags = tr->current_trace->flags->val;
5218 	trace_opts = tr->current_trace->flags->opts;
5219 
5220 	for (i = 0; trace_options[i]; i++) {
5221 		if (tr->trace_flags & (1 << i))
5222 			seq_printf(m, "%s\n", trace_options[i]);
5223 		else
5224 			seq_printf(m, "no%s\n", trace_options[i]);
5225 	}
5226 
5227 	for (i = 0; trace_opts[i].name; i++) {
5228 		if (tracer_flags & trace_opts[i].bit)
5229 			seq_printf(m, "%s\n", trace_opts[i].name);
5230 		else
5231 			seq_printf(m, "no%s\n", trace_opts[i].name);
5232 	}
5233 	mutex_unlock(&trace_types_lock);
5234 
5235 	return 0;
5236 }
5237 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5238 static int __set_tracer_option(struct trace_array *tr,
5239 			       struct tracer_flags *tracer_flags,
5240 			       struct tracer_opt *opts, int neg)
5241 {
5242 	struct tracer *trace = tracer_flags->trace;
5243 	int ret;
5244 
5245 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5246 	if (ret)
5247 		return ret;
5248 
5249 	if (neg)
5250 		tracer_flags->val &= ~opts->bit;
5251 	else
5252 		tracer_flags->val |= opts->bit;
5253 	return 0;
5254 }
5255 
5256 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5257 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5258 {
5259 	struct tracer *trace = tr->current_trace;
5260 	struct tracer_flags *tracer_flags = trace->flags;
5261 	struct tracer_opt *opts = NULL;
5262 	int i;
5263 
5264 	for (i = 0; tracer_flags->opts[i].name; i++) {
5265 		opts = &tracer_flags->opts[i];
5266 
5267 		if (strcmp(cmp, opts->name) == 0)
5268 			return __set_tracer_option(tr, trace->flags, opts, neg);
5269 	}
5270 
5271 	return -EINVAL;
5272 }
5273 
5274 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5275 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5276 {
5277 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5278 		return -1;
5279 
5280 	return 0;
5281 }
5282 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5283 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5284 {
5285 	int *map;
5286 
5287 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5288 	    (mask == TRACE_ITER_RECORD_CMD))
5289 		lockdep_assert_held(&event_mutex);
5290 
5291 	/* do nothing if flag is already set */
5292 	if (!!(tr->trace_flags & mask) == !!enabled)
5293 		return 0;
5294 
5295 	/* Give the tracer a chance to approve the change */
5296 	if (tr->current_trace->flag_changed)
5297 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5298 			return -EINVAL;
5299 
5300 	if (enabled)
5301 		tr->trace_flags |= mask;
5302 	else
5303 		tr->trace_flags &= ~mask;
5304 
5305 	if (mask == TRACE_ITER_RECORD_CMD)
5306 		trace_event_enable_cmd_record(enabled);
5307 
5308 	if (mask == TRACE_ITER_RECORD_TGID) {
5309 		if (!tgid_map) {
5310 			tgid_map_max = pid_max;
5311 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5312 				       GFP_KERNEL);
5313 
5314 			/*
5315 			 * Pairs with smp_load_acquire() in
5316 			 * trace_find_tgid_ptr() to ensure that if it observes
5317 			 * the tgid_map we just allocated then it also observes
5318 			 * the corresponding tgid_map_max value.
5319 			 */
5320 			smp_store_release(&tgid_map, map);
5321 		}
5322 		if (!tgid_map) {
5323 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5324 			return -ENOMEM;
5325 		}
5326 
5327 		trace_event_enable_tgid_record(enabled);
5328 	}
5329 
5330 	if (mask == TRACE_ITER_EVENT_FORK)
5331 		trace_event_follow_fork(tr, enabled);
5332 
5333 	if (mask == TRACE_ITER_FUNC_FORK)
5334 		ftrace_pid_follow_fork(tr, enabled);
5335 
5336 	if (mask == TRACE_ITER_OVERWRITE) {
5337 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5338 #ifdef CONFIG_TRACER_MAX_TRACE
5339 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5340 #endif
5341 	}
5342 
5343 	if (mask == TRACE_ITER_PRINTK) {
5344 		trace_printk_start_stop_comm(enabled);
5345 		trace_printk_control(enabled);
5346 	}
5347 
5348 	return 0;
5349 }
5350 
trace_set_options(struct trace_array * tr,char * option)5351 int trace_set_options(struct trace_array *tr, char *option)
5352 {
5353 	char *cmp;
5354 	int neg = 0;
5355 	int ret;
5356 	size_t orig_len = strlen(option);
5357 	int len;
5358 
5359 	cmp = strstrip(option);
5360 
5361 	len = str_has_prefix(cmp, "no");
5362 	if (len)
5363 		neg = 1;
5364 
5365 	cmp += len;
5366 
5367 	mutex_lock(&event_mutex);
5368 	mutex_lock(&trace_types_lock);
5369 
5370 	ret = match_string(trace_options, -1, cmp);
5371 	/* If no option could be set, test the specific tracer options */
5372 	if (ret < 0)
5373 		ret = set_tracer_option(tr, cmp, neg);
5374 	else
5375 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5376 
5377 	mutex_unlock(&trace_types_lock);
5378 	mutex_unlock(&event_mutex);
5379 
5380 	/*
5381 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5382 	 * turn it back into a space.
5383 	 */
5384 	if (orig_len > strlen(option))
5385 		option[strlen(option)] = ' ';
5386 
5387 	return ret;
5388 }
5389 
apply_trace_boot_options(void)5390 static void __init apply_trace_boot_options(void)
5391 {
5392 	char *buf = trace_boot_options_buf;
5393 	char *option;
5394 
5395 	while (true) {
5396 		option = strsep(&buf, ",");
5397 
5398 		if (!option)
5399 			break;
5400 
5401 		if (*option)
5402 			trace_set_options(&global_trace, option);
5403 
5404 		/* Put back the comma to allow this to be called again */
5405 		if (buf)
5406 			*(buf - 1) = ',';
5407 	}
5408 }
5409 
5410 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5411 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5412 			size_t cnt, loff_t *ppos)
5413 {
5414 	struct seq_file *m = filp->private_data;
5415 	struct trace_array *tr = m->private;
5416 	char buf[64];
5417 	int ret;
5418 
5419 	if (cnt >= sizeof(buf))
5420 		return -EINVAL;
5421 
5422 	if (copy_from_user(buf, ubuf, cnt))
5423 		return -EFAULT;
5424 
5425 	buf[cnt] = 0;
5426 
5427 	ret = trace_set_options(tr, buf);
5428 	if (ret < 0)
5429 		return ret;
5430 
5431 	*ppos += cnt;
5432 
5433 	return cnt;
5434 }
5435 
tracing_trace_options_open(struct inode * inode,struct file * file)5436 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5437 {
5438 	struct trace_array *tr = inode->i_private;
5439 	int ret;
5440 
5441 	ret = tracing_check_open_get_tr(tr);
5442 	if (ret)
5443 		return ret;
5444 
5445 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5446 	if (ret < 0)
5447 		trace_array_put(tr);
5448 
5449 	return ret;
5450 }
5451 
5452 static const struct file_operations tracing_iter_fops = {
5453 	.open		= tracing_trace_options_open,
5454 	.read		= seq_read,
5455 	.llseek		= seq_lseek,
5456 	.release	= tracing_single_release_tr,
5457 	.write		= tracing_trace_options_write,
5458 };
5459 
5460 static const char readme_msg[] =
5461 	"tracing mini-HOWTO:\n\n"
5462 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5463 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5464 	" Important files:\n"
5465 	"  trace\t\t\t- The static contents of the buffer\n"
5466 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5467 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5468 	"  current_tracer\t- function and latency tracers\n"
5469 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5470 	"  error_log\t- error log for failed commands (that support it)\n"
5471 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5472 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5473 	"  trace_clock\t\t- change the clock used to order events\n"
5474 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5475 	"      global:   Synced across CPUs but slows tracing down.\n"
5476 	"     counter:   Not a clock, but just an increment\n"
5477 	"      uptime:   Jiffy counter from time of boot\n"
5478 	"        perf:   Same clock that perf events use\n"
5479 #ifdef CONFIG_X86_64
5480 	"     x86-tsc:   TSC cycle counter\n"
5481 #endif
5482 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5483 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5484 	"    absolute:   Absolute (standalone) timestamp\n"
5485 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5486 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5487 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5488 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5489 	"\t\t\t  Remove sub-buffer with rmdir\n"
5490 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5491 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5492 	"\t\t\t  option name\n"
5493 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5494 #ifdef CONFIG_DYNAMIC_FTRACE
5495 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5496 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5497 	"\t\t\t  functions\n"
5498 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5499 	"\t     modules: Can select a group via module\n"
5500 	"\t      Format: :mod:<module-name>\n"
5501 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5502 	"\t    triggers: a command to perform when function is hit\n"
5503 	"\t      Format: <function>:<trigger>[:count]\n"
5504 	"\t     trigger: traceon, traceoff\n"
5505 	"\t\t      enable_event:<system>:<event>\n"
5506 	"\t\t      disable_event:<system>:<event>\n"
5507 #ifdef CONFIG_STACKTRACE
5508 	"\t\t      stacktrace\n"
5509 #endif
5510 #ifdef CONFIG_TRACER_SNAPSHOT
5511 	"\t\t      snapshot\n"
5512 #endif
5513 	"\t\t      dump\n"
5514 	"\t\t      cpudump\n"
5515 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5516 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5517 	"\t     The first one will disable tracing every time do_fault is hit\n"
5518 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5519 	"\t       The first time do trap is hit and it disables tracing, the\n"
5520 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5521 	"\t       the counter will not decrement. It only decrements when the\n"
5522 	"\t       trigger did work\n"
5523 	"\t     To remove trigger without count:\n"
5524 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5525 	"\t     To remove trigger with a count:\n"
5526 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5527 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5528 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5529 	"\t    modules: Can select a group via module command :mod:\n"
5530 	"\t    Does not accept triggers\n"
5531 #endif /* CONFIG_DYNAMIC_FTRACE */
5532 #ifdef CONFIG_FUNCTION_TRACER
5533 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5534 	"\t\t    (function)\n"
5535 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5536 	"\t\t    (function)\n"
5537 #endif
5538 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5539 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5540 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5541 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5542 #endif
5543 #ifdef CONFIG_TRACER_SNAPSHOT
5544 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5545 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5546 	"\t\t\t  information\n"
5547 #endif
5548 #ifdef CONFIG_STACK_TRACER
5549 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5550 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5551 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5552 	"\t\t\t  new trace)\n"
5553 #ifdef CONFIG_DYNAMIC_FTRACE
5554 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5555 	"\t\t\t  traces\n"
5556 #endif
5557 #endif /* CONFIG_STACK_TRACER */
5558 #ifdef CONFIG_DYNAMIC_EVENTS
5559 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5560 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5561 #endif
5562 #ifdef CONFIG_KPROBE_EVENTS
5563 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5564 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5565 #endif
5566 #ifdef CONFIG_UPROBE_EVENTS
5567 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5568 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5569 #endif
5570 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5571 	"\t  accepts: event-definitions (one definition per line)\n"
5572 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5573 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5574 #ifdef CONFIG_HIST_TRIGGERS
5575 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5576 #endif
5577 	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5578 	"\t           -:[<group>/]<event>\n"
5579 #ifdef CONFIG_KPROBE_EVENTS
5580 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5581   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5582 #endif
5583 #ifdef CONFIG_UPROBE_EVENTS
5584   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5585 #endif
5586 	"\t     args: <name>=fetcharg[:type]\n"
5587 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5588 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5589 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5590 #else
5591 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5592 #endif
5593 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5594 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5595 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5596 	"\t           <type>\\[<array-size>\\]\n"
5597 #ifdef CONFIG_HIST_TRIGGERS
5598 	"\t    field: <stype> <name>;\n"
5599 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5600 	"\t           [unsigned] char/int/long\n"
5601 #endif
5602 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5603 	"\t            of the <attached-group>/<attached-event>.\n"
5604 #endif
5605 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5606 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5607 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5608 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5609 	"\t\t\t  events\n"
5610 	"      filter\t\t- If set, only events passing filter are traced\n"
5611 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5612 	"\t\t\t  <event>:\n"
5613 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5614 	"      filter\t\t- If set, only events passing filter are traced\n"
5615 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5616 	"\t    Format: <trigger>[:count][if <filter>]\n"
5617 	"\t   trigger: traceon, traceoff\n"
5618 	"\t            enable_event:<system>:<event>\n"
5619 	"\t            disable_event:<system>:<event>\n"
5620 #ifdef CONFIG_HIST_TRIGGERS
5621 	"\t            enable_hist:<system>:<event>\n"
5622 	"\t            disable_hist:<system>:<event>\n"
5623 #endif
5624 #ifdef CONFIG_STACKTRACE
5625 	"\t\t    stacktrace\n"
5626 #endif
5627 #ifdef CONFIG_TRACER_SNAPSHOT
5628 	"\t\t    snapshot\n"
5629 #endif
5630 #ifdef CONFIG_HIST_TRIGGERS
5631 	"\t\t    hist (see below)\n"
5632 #endif
5633 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5634 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5635 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5636 	"\t                  events/block/block_unplug/trigger\n"
5637 	"\t   The first disables tracing every time block_unplug is hit.\n"
5638 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5639 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5640 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5641 	"\t   Like function triggers, the counter is only decremented if it\n"
5642 	"\t    enabled or disabled tracing.\n"
5643 	"\t   To remove a trigger without a count:\n"
5644 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5645 	"\t   To remove a trigger with a count:\n"
5646 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5647 	"\t   Filters can be ignored when removing a trigger.\n"
5648 #ifdef CONFIG_HIST_TRIGGERS
5649 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5650 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5651 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5652 	"\t            [:values=<field1[,field2,...]>]\n"
5653 	"\t            [:sort=<field1[,field2,...]>]\n"
5654 	"\t            [:size=#entries]\n"
5655 	"\t            [:pause][:continue][:clear]\n"
5656 	"\t            [:name=histname1]\n"
5657 	"\t            [:<handler>.<action>]\n"
5658 	"\t            [if <filter>]\n\n"
5659 	"\t    Note, special fields can be used as well:\n"
5660 	"\t            common_timestamp - to record current timestamp\n"
5661 	"\t            common_cpu - to record the CPU the event happened on\n"
5662 	"\n"
5663 	"\t    A hist trigger variable can be:\n"
5664 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5665 	"\t        - a reference to another variable e.g. y=$x,\n"
5666 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5667 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5668 	"\n"
5669 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5670 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5671 	"\t    variable reference, field or numeric literal.\n"
5672 	"\n"
5673 	"\t    When a matching event is hit, an entry is added to a hash\n"
5674 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5675 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5676 	"\t    correspond to fields in the event's format description.  Keys\n"
5677 	"\t    can be any field, or the special string 'stacktrace'.\n"
5678 	"\t    Compound keys consisting of up to two fields can be specified\n"
5679 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5680 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5681 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5682 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5683 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5684 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5685 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5686 	"\t    its histogram data will be shared with other triggers of the\n"
5687 	"\t    same name, and trigger hits will update this common data.\n\n"
5688 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5689 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5690 	"\t    triggers attached to an event, there will be a table for each\n"
5691 	"\t    trigger in the output.  The table displayed for a named\n"
5692 	"\t    trigger will be the same as any other instance having the\n"
5693 	"\t    same name.  The default format used to display a given field\n"
5694 	"\t    can be modified by appending any of the following modifiers\n"
5695 	"\t    to the field name, as applicable:\n\n"
5696 	"\t            .hex        display a number as a hex value\n"
5697 	"\t            .sym        display an address as a symbol\n"
5698 	"\t            .sym-offset display an address as a symbol and offset\n"
5699 	"\t            .execname   display a common_pid as a program name\n"
5700 	"\t            .syscall    display a syscall id as a syscall name\n"
5701 	"\t            .log2       display log2 value rather than raw number\n"
5702 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5703 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5704 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5705 	"\t    trigger or to start a hist trigger but not log any events\n"
5706 	"\t    until told to do so.  'continue' can be used to start or\n"
5707 	"\t    restart a paused hist trigger.\n\n"
5708 	"\t    The 'clear' parameter will clear the contents of a running\n"
5709 	"\t    hist trigger and leave its current paused/active state\n"
5710 	"\t    unchanged.\n\n"
5711 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5712 	"\t    have one event conditionally start and stop another event's\n"
5713 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5714 	"\t    the enable_event and disable_event triggers.\n\n"
5715 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5716 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5717 	"\t        <handler>.<action>\n\n"
5718 	"\t    The available handlers are:\n\n"
5719 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5720 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5721 	"\t        onchange(var)            - invoke action if var changes\n\n"
5722 	"\t    The available actions are:\n\n"
5723 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5724 	"\t        save(field,...)                      - save current event fields\n"
5725 #ifdef CONFIG_TRACER_SNAPSHOT
5726 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5727 #endif
5728 #ifdef CONFIG_SYNTH_EVENTS
5729 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5730 	"\t  Write into this file to define/undefine new synthetic events.\n"
5731 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5732 #endif
5733 #endif
5734 ;
5735 
5736 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5737 tracing_readme_read(struct file *filp, char __user *ubuf,
5738 		       size_t cnt, loff_t *ppos)
5739 {
5740 	return simple_read_from_buffer(ubuf, cnt, ppos,
5741 					readme_msg, strlen(readme_msg));
5742 }
5743 
5744 static const struct file_operations tracing_readme_fops = {
5745 	.open		= tracing_open_generic,
5746 	.read		= tracing_readme_read,
5747 	.llseek		= generic_file_llseek,
5748 };
5749 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5750 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5751 {
5752 	int pid = ++(*pos);
5753 
5754 	return trace_find_tgid_ptr(pid);
5755 }
5756 
saved_tgids_start(struct seq_file * m,loff_t * pos)5757 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5758 {
5759 	int pid = *pos;
5760 
5761 	return trace_find_tgid_ptr(pid);
5762 }
5763 
saved_tgids_stop(struct seq_file * m,void * v)5764 static void saved_tgids_stop(struct seq_file *m, void *v)
5765 {
5766 }
5767 
saved_tgids_show(struct seq_file * m,void * v)5768 static int saved_tgids_show(struct seq_file *m, void *v)
5769 {
5770 	int *entry = (int *)v;
5771 	int pid = entry - tgid_map;
5772 	int tgid = *entry;
5773 
5774 	if (tgid == 0)
5775 		return SEQ_SKIP;
5776 
5777 	seq_printf(m, "%d %d\n", pid, tgid);
5778 	return 0;
5779 }
5780 
5781 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5782 	.start		= saved_tgids_start,
5783 	.stop		= saved_tgids_stop,
5784 	.next		= saved_tgids_next,
5785 	.show		= saved_tgids_show,
5786 };
5787 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5788 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5789 {
5790 	int ret;
5791 
5792 	ret = tracing_check_open_get_tr(NULL);
5793 	if (ret)
5794 		return ret;
5795 
5796 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5797 }
5798 
5799 
5800 static const struct file_operations tracing_saved_tgids_fops = {
5801 	.open		= tracing_saved_tgids_open,
5802 	.read		= seq_read,
5803 	.llseek		= seq_lseek,
5804 	.release	= seq_release,
5805 };
5806 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5807 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5808 {
5809 	unsigned int *ptr = v;
5810 
5811 	if (*pos || m->count)
5812 		ptr++;
5813 
5814 	(*pos)++;
5815 
5816 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5817 	     ptr++) {
5818 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5819 			continue;
5820 
5821 		return ptr;
5822 	}
5823 
5824 	return NULL;
5825 }
5826 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5827 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5828 {
5829 	void *v;
5830 	loff_t l = 0;
5831 
5832 	preempt_disable();
5833 	arch_spin_lock(&trace_cmdline_lock);
5834 
5835 	v = &savedcmd->map_cmdline_to_pid[0];
5836 	while (l <= *pos) {
5837 		v = saved_cmdlines_next(m, v, &l);
5838 		if (!v)
5839 			return NULL;
5840 	}
5841 
5842 	return v;
5843 }
5844 
saved_cmdlines_stop(struct seq_file * m,void * v)5845 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5846 {
5847 	arch_spin_unlock(&trace_cmdline_lock);
5848 	preempt_enable();
5849 }
5850 
saved_cmdlines_show(struct seq_file * m,void * v)5851 static int saved_cmdlines_show(struct seq_file *m, void *v)
5852 {
5853 	char buf[TASK_COMM_LEN];
5854 	unsigned int *pid = v;
5855 
5856 	__trace_find_cmdline(*pid, buf);
5857 	seq_printf(m, "%d %s\n", *pid, buf);
5858 	return 0;
5859 }
5860 
5861 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5862 	.start		= saved_cmdlines_start,
5863 	.next		= saved_cmdlines_next,
5864 	.stop		= saved_cmdlines_stop,
5865 	.show		= saved_cmdlines_show,
5866 };
5867 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5868 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5869 {
5870 	int ret;
5871 
5872 	ret = tracing_check_open_get_tr(NULL);
5873 	if (ret)
5874 		return ret;
5875 
5876 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5877 }
5878 
5879 static const struct file_operations tracing_saved_cmdlines_fops = {
5880 	.open		= tracing_saved_cmdlines_open,
5881 	.read		= seq_read,
5882 	.llseek		= seq_lseek,
5883 	.release	= seq_release,
5884 };
5885 
5886 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5887 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5888 				 size_t cnt, loff_t *ppos)
5889 {
5890 	char buf[64];
5891 	int r;
5892 
5893 	arch_spin_lock(&trace_cmdline_lock);
5894 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5895 	arch_spin_unlock(&trace_cmdline_lock);
5896 
5897 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5898 }
5899 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)5900 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5901 {
5902 	kfree(s->saved_cmdlines);
5903 	kfree(s->map_cmdline_to_pid);
5904 	kfree(s);
5905 }
5906 
tracing_resize_saved_cmdlines(unsigned int val)5907 static int tracing_resize_saved_cmdlines(unsigned int val)
5908 {
5909 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5910 
5911 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5912 	if (!s)
5913 		return -ENOMEM;
5914 
5915 	if (allocate_cmdlines_buffer(val, s) < 0) {
5916 		kfree(s);
5917 		return -ENOMEM;
5918 	}
5919 
5920 	arch_spin_lock(&trace_cmdline_lock);
5921 	savedcmd_temp = savedcmd;
5922 	savedcmd = s;
5923 	arch_spin_unlock(&trace_cmdline_lock);
5924 	free_saved_cmdlines_buffer(savedcmd_temp);
5925 
5926 	return 0;
5927 }
5928 
5929 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5930 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5931 				  size_t cnt, loff_t *ppos)
5932 {
5933 	unsigned long val;
5934 	int ret;
5935 
5936 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5937 	if (ret)
5938 		return ret;
5939 
5940 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5941 	if (!val || val > PID_MAX_DEFAULT)
5942 		return -EINVAL;
5943 
5944 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5945 	if (ret < 0)
5946 		return ret;
5947 
5948 	*ppos += cnt;
5949 
5950 	return cnt;
5951 }
5952 
5953 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5954 	.open		= tracing_open_generic,
5955 	.read		= tracing_saved_cmdlines_size_read,
5956 	.write		= tracing_saved_cmdlines_size_write,
5957 };
5958 
5959 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5960 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5961 update_eval_map(union trace_eval_map_item *ptr)
5962 {
5963 	if (!ptr->map.eval_string) {
5964 		if (ptr->tail.next) {
5965 			ptr = ptr->tail.next;
5966 			/* Set ptr to the next real item (skip head) */
5967 			ptr++;
5968 		} else
5969 			return NULL;
5970 	}
5971 	return ptr;
5972 }
5973 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5974 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5975 {
5976 	union trace_eval_map_item *ptr = v;
5977 
5978 	/*
5979 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5980 	 * This really should never happen.
5981 	 */
5982 	(*pos)++;
5983 	ptr = update_eval_map(ptr);
5984 	if (WARN_ON_ONCE(!ptr))
5985 		return NULL;
5986 
5987 	ptr++;
5988 	ptr = update_eval_map(ptr);
5989 
5990 	return ptr;
5991 }
5992 
eval_map_start(struct seq_file * m,loff_t * pos)5993 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5994 {
5995 	union trace_eval_map_item *v;
5996 	loff_t l = 0;
5997 
5998 	mutex_lock(&trace_eval_mutex);
5999 
6000 	v = trace_eval_maps;
6001 	if (v)
6002 		v++;
6003 
6004 	while (v && l < *pos) {
6005 		v = eval_map_next(m, v, &l);
6006 	}
6007 
6008 	return v;
6009 }
6010 
eval_map_stop(struct seq_file * m,void * v)6011 static void eval_map_stop(struct seq_file *m, void *v)
6012 {
6013 	mutex_unlock(&trace_eval_mutex);
6014 }
6015 
eval_map_show(struct seq_file * m,void * v)6016 static int eval_map_show(struct seq_file *m, void *v)
6017 {
6018 	union trace_eval_map_item *ptr = v;
6019 
6020 	seq_printf(m, "%s %ld (%s)\n",
6021 		   ptr->map.eval_string, ptr->map.eval_value,
6022 		   ptr->map.system);
6023 
6024 	return 0;
6025 }
6026 
6027 static const struct seq_operations tracing_eval_map_seq_ops = {
6028 	.start		= eval_map_start,
6029 	.next		= eval_map_next,
6030 	.stop		= eval_map_stop,
6031 	.show		= eval_map_show,
6032 };
6033 
tracing_eval_map_open(struct inode * inode,struct file * filp)6034 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6035 {
6036 	int ret;
6037 
6038 	ret = tracing_check_open_get_tr(NULL);
6039 	if (ret)
6040 		return ret;
6041 
6042 	return seq_open(filp, &tracing_eval_map_seq_ops);
6043 }
6044 
6045 static const struct file_operations tracing_eval_map_fops = {
6046 	.open		= tracing_eval_map_open,
6047 	.read		= seq_read,
6048 	.llseek		= seq_lseek,
6049 	.release	= seq_release,
6050 };
6051 
6052 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6053 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6054 {
6055 	/* Return tail of array given the head */
6056 	return ptr + ptr->head.length + 1;
6057 }
6058 
6059 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6060 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6061 			   int len)
6062 {
6063 	struct trace_eval_map **stop;
6064 	struct trace_eval_map **map;
6065 	union trace_eval_map_item *map_array;
6066 	union trace_eval_map_item *ptr;
6067 
6068 	stop = start + len;
6069 
6070 	/*
6071 	 * The trace_eval_maps contains the map plus a head and tail item,
6072 	 * where the head holds the module and length of array, and the
6073 	 * tail holds a pointer to the next list.
6074 	 */
6075 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6076 	if (!map_array) {
6077 		pr_warn("Unable to allocate trace eval mapping\n");
6078 		return;
6079 	}
6080 
6081 	mutex_lock(&trace_eval_mutex);
6082 
6083 	if (!trace_eval_maps)
6084 		trace_eval_maps = map_array;
6085 	else {
6086 		ptr = trace_eval_maps;
6087 		for (;;) {
6088 			ptr = trace_eval_jmp_to_tail(ptr);
6089 			if (!ptr->tail.next)
6090 				break;
6091 			ptr = ptr->tail.next;
6092 
6093 		}
6094 		ptr->tail.next = map_array;
6095 	}
6096 	map_array->head.mod = mod;
6097 	map_array->head.length = len;
6098 	map_array++;
6099 
6100 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6101 		map_array->map = **map;
6102 		map_array++;
6103 	}
6104 	memset(map_array, 0, sizeof(*map_array));
6105 
6106 	mutex_unlock(&trace_eval_mutex);
6107 }
6108 
trace_create_eval_file(struct dentry * d_tracer)6109 static void trace_create_eval_file(struct dentry *d_tracer)
6110 {
6111 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6112 			  NULL, &tracing_eval_map_fops);
6113 }
6114 
6115 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6116 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6117 static inline void trace_insert_eval_map_file(struct module *mod,
6118 			      struct trace_eval_map **start, int len) { }
6119 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6120 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6121 static void trace_insert_eval_map(struct module *mod,
6122 				  struct trace_eval_map **start, int len)
6123 {
6124 	struct trace_eval_map **map;
6125 
6126 	if (len <= 0)
6127 		return;
6128 
6129 	map = start;
6130 
6131 	trace_event_eval_update(map, len);
6132 
6133 	trace_insert_eval_map_file(mod, start, len);
6134 }
6135 
6136 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6137 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6138 		       size_t cnt, loff_t *ppos)
6139 {
6140 	struct trace_array *tr = filp->private_data;
6141 	char buf[MAX_TRACER_SIZE+2];
6142 	int r;
6143 
6144 	mutex_lock(&trace_types_lock);
6145 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6146 	mutex_unlock(&trace_types_lock);
6147 
6148 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6149 }
6150 
tracer_init(struct tracer * t,struct trace_array * tr)6151 int tracer_init(struct tracer *t, struct trace_array *tr)
6152 {
6153 	tracing_reset_online_cpus(&tr->array_buffer);
6154 	return t->init(tr);
6155 }
6156 
set_buffer_entries(struct array_buffer * buf,unsigned long val)6157 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6158 {
6159 	int cpu;
6160 
6161 	for_each_tracing_cpu(cpu)
6162 		per_cpu_ptr(buf->data, cpu)->entries = val;
6163 }
6164 
6165 #ifdef CONFIG_TRACER_MAX_TRACE
6166 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6167 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6168 					struct array_buffer *size_buf, int cpu_id)
6169 {
6170 	int cpu, ret = 0;
6171 
6172 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6173 		for_each_tracing_cpu(cpu) {
6174 			ret = ring_buffer_resize(trace_buf->buffer,
6175 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6176 			if (ret < 0)
6177 				break;
6178 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6179 				per_cpu_ptr(size_buf->data, cpu)->entries;
6180 		}
6181 	} else {
6182 		ret = ring_buffer_resize(trace_buf->buffer,
6183 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6184 		if (ret == 0)
6185 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6186 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6187 	}
6188 
6189 	return ret;
6190 }
6191 #endif /* CONFIG_TRACER_MAX_TRACE */
6192 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6193 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6194 					unsigned long size, int cpu)
6195 {
6196 	int ret;
6197 
6198 	/*
6199 	 * If kernel or user changes the size of the ring buffer
6200 	 * we use the size that was given, and we can forget about
6201 	 * expanding it later.
6202 	 */
6203 	ring_buffer_expanded = true;
6204 
6205 	/* May be called before buffers are initialized */
6206 	if (!tr->array_buffer.buffer)
6207 		return 0;
6208 
6209 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6210 	if (ret < 0)
6211 		return ret;
6212 
6213 #ifdef CONFIG_TRACER_MAX_TRACE
6214 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6215 	    !tr->current_trace->use_max_tr)
6216 		goto out;
6217 
6218 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6219 	if (ret < 0) {
6220 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6221 						     &tr->array_buffer, cpu);
6222 		if (r < 0) {
6223 			/*
6224 			 * AARGH! We are left with different
6225 			 * size max buffer!!!!
6226 			 * The max buffer is our "snapshot" buffer.
6227 			 * When a tracer needs a snapshot (one of the
6228 			 * latency tracers), it swaps the max buffer
6229 			 * with the saved snap shot. We succeeded to
6230 			 * update the size of the main buffer, but failed to
6231 			 * update the size of the max buffer. But when we tried
6232 			 * to reset the main buffer to the original size, we
6233 			 * failed there too. This is very unlikely to
6234 			 * happen, but if it does, warn and kill all
6235 			 * tracing.
6236 			 */
6237 			WARN_ON(1);
6238 			tracing_disabled = 1;
6239 		}
6240 		return ret;
6241 	}
6242 
6243 	if (cpu == RING_BUFFER_ALL_CPUS)
6244 		set_buffer_entries(&tr->max_buffer, size);
6245 	else
6246 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6247 
6248  out:
6249 #endif /* CONFIG_TRACER_MAX_TRACE */
6250 
6251 	if (cpu == RING_BUFFER_ALL_CPUS)
6252 		set_buffer_entries(&tr->array_buffer, size);
6253 	else
6254 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6255 
6256 	return ret;
6257 }
6258 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6259 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6260 				  unsigned long size, int cpu_id)
6261 {
6262 	int ret;
6263 
6264 	mutex_lock(&trace_types_lock);
6265 
6266 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6267 		/* make sure, this cpu is enabled in the mask */
6268 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6269 			ret = -EINVAL;
6270 			goto out;
6271 		}
6272 	}
6273 
6274 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6275 	if (ret < 0)
6276 		ret = -ENOMEM;
6277 
6278 out:
6279 	mutex_unlock(&trace_types_lock);
6280 
6281 	return ret;
6282 }
6283 
6284 
6285 /**
6286  * tracing_update_buffers - used by tracing facility to expand ring buffers
6287  *
6288  * To save on memory when the tracing is never used on a system with it
6289  * configured in. The ring buffers are set to a minimum size. But once
6290  * a user starts to use the tracing facility, then they need to grow
6291  * to their default size.
6292  *
6293  * This function is to be called when a tracer is about to be used.
6294  */
tracing_update_buffers(void)6295 int tracing_update_buffers(void)
6296 {
6297 	int ret = 0;
6298 
6299 	mutex_lock(&trace_types_lock);
6300 	if (!ring_buffer_expanded)
6301 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6302 						RING_BUFFER_ALL_CPUS);
6303 	mutex_unlock(&trace_types_lock);
6304 
6305 	return ret;
6306 }
6307 
6308 struct trace_option_dentry;
6309 
6310 static void
6311 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6312 
6313 /*
6314  * Used to clear out the tracer before deletion of an instance.
6315  * Must have trace_types_lock held.
6316  */
tracing_set_nop(struct trace_array * tr)6317 static void tracing_set_nop(struct trace_array *tr)
6318 {
6319 	if (tr->current_trace == &nop_trace)
6320 		return;
6321 
6322 	tr->current_trace->enabled--;
6323 
6324 	if (tr->current_trace->reset)
6325 		tr->current_trace->reset(tr);
6326 
6327 	tr->current_trace = &nop_trace;
6328 }
6329 
6330 static bool tracer_options_updated;
6331 
add_tracer_options(struct trace_array * tr,struct tracer * t)6332 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6333 {
6334 	/* Only enable if the directory has been created already. */
6335 	if (!tr->dir)
6336 		return;
6337 
6338 	/* Only create trace option files after update_tracer_options finish */
6339 	if (!tracer_options_updated)
6340 		return;
6341 
6342 	create_trace_option_files(tr, t);
6343 }
6344 
tracing_set_tracer(struct trace_array * tr,const char * buf)6345 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6346 {
6347 	struct tracer *t;
6348 #ifdef CONFIG_TRACER_MAX_TRACE
6349 	bool had_max_tr;
6350 #endif
6351 	int ret = 0;
6352 
6353 	mutex_lock(&trace_types_lock);
6354 
6355 	if (!ring_buffer_expanded) {
6356 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6357 						RING_BUFFER_ALL_CPUS);
6358 		if (ret < 0)
6359 			goto out;
6360 		ret = 0;
6361 	}
6362 
6363 	for (t = trace_types; t; t = t->next) {
6364 		if (strcmp(t->name, buf) == 0)
6365 			break;
6366 	}
6367 	if (!t) {
6368 		ret = -EINVAL;
6369 		goto out;
6370 	}
6371 	if (t == tr->current_trace)
6372 		goto out;
6373 
6374 #ifdef CONFIG_TRACER_SNAPSHOT
6375 	if (t->use_max_tr) {
6376 		arch_spin_lock(&tr->max_lock);
6377 		if (tr->cond_snapshot)
6378 			ret = -EBUSY;
6379 		arch_spin_unlock(&tr->max_lock);
6380 		if (ret)
6381 			goto out;
6382 	}
6383 #endif
6384 	/* Some tracers won't work on kernel command line */
6385 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6386 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6387 			t->name);
6388 		goto out;
6389 	}
6390 
6391 	/* Some tracers are only allowed for the top level buffer */
6392 	if (!trace_ok_for_array(t, tr)) {
6393 		ret = -EINVAL;
6394 		goto out;
6395 	}
6396 
6397 	/* If trace pipe files are being read, we can't change the tracer */
6398 	if (tr->trace_ref) {
6399 		ret = -EBUSY;
6400 		goto out;
6401 	}
6402 
6403 	trace_branch_disable();
6404 
6405 	tr->current_trace->enabled--;
6406 
6407 	if (tr->current_trace->reset)
6408 		tr->current_trace->reset(tr);
6409 
6410 	/* Current trace needs to be nop_trace before synchronize_rcu */
6411 	tr->current_trace = &nop_trace;
6412 
6413 #ifdef CONFIG_TRACER_MAX_TRACE
6414 	had_max_tr = tr->allocated_snapshot;
6415 
6416 	if (had_max_tr && !t->use_max_tr) {
6417 		/*
6418 		 * We need to make sure that the update_max_tr sees that
6419 		 * current_trace changed to nop_trace to keep it from
6420 		 * swapping the buffers after we resize it.
6421 		 * The update_max_tr is called from interrupts disabled
6422 		 * so a synchronized_sched() is sufficient.
6423 		 */
6424 		synchronize_rcu();
6425 		free_snapshot(tr);
6426 	}
6427 
6428 	if (t->use_max_tr && !had_max_tr) {
6429 		ret = tracing_alloc_snapshot_instance(tr);
6430 		if (ret < 0)
6431 			goto out;
6432 	}
6433 #endif
6434 
6435 	if (t->init) {
6436 		ret = tracer_init(t, tr);
6437 		if (ret)
6438 			goto out;
6439 	}
6440 
6441 	tr->current_trace = t;
6442 	tr->current_trace->enabled++;
6443 	trace_branch_enable(tr);
6444  out:
6445 	mutex_unlock(&trace_types_lock);
6446 
6447 	return ret;
6448 }
6449 
6450 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6451 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6452 			size_t cnt, loff_t *ppos)
6453 {
6454 	struct trace_array *tr = filp->private_data;
6455 	char buf[MAX_TRACER_SIZE+1];
6456 	char *name;
6457 	size_t ret;
6458 	int err;
6459 
6460 	ret = cnt;
6461 
6462 	if (cnt > MAX_TRACER_SIZE)
6463 		cnt = MAX_TRACER_SIZE;
6464 
6465 	if (copy_from_user(buf, ubuf, cnt))
6466 		return -EFAULT;
6467 
6468 	buf[cnt] = 0;
6469 
6470 	name = strim(buf);
6471 
6472 	err = tracing_set_tracer(tr, name);
6473 	if (err)
6474 		return err;
6475 
6476 	*ppos += ret;
6477 
6478 	return ret;
6479 }
6480 
6481 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6482 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6483 		   size_t cnt, loff_t *ppos)
6484 {
6485 	char buf[64];
6486 	int r;
6487 
6488 	r = snprintf(buf, sizeof(buf), "%ld\n",
6489 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6490 	if (r > sizeof(buf))
6491 		r = sizeof(buf);
6492 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6493 }
6494 
6495 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6496 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6497 		    size_t cnt, loff_t *ppos)
6498 {
6499 	unsigned long val;
6500 	int ret;
6501 
6502 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6503 	if (ret)
6504 		return ret;
6505 
6506 	*ptr = val * 1000;
6507 
6508 	return cnt;
6509 }
6510 
6511 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6512 tracing_thresh_read(struct file *filp, char __user *ubuf,
6513 		    size_t cnt, loff_t *ppos)
6514 {
6515 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6516 }
6517 
6518 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6519 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6520 		     size_t cnt, loff_t *ppos)
6521 {
6522 	struct trace_array *tr = filp->private_data;
6523 	int ret;
6524 
6525 	mutex_lock(&trace_types_lock);
6526 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6527 	if (ret < 0)
6528 		goto out;
6529 
6530 	if (tr->current_trace->update_thresh) {
6531 		ret = tr->current_trace->update_thresh(tr);
6532 		if (ret < 0)
6533 			goto out;
6534 	}
6535 
6536 	ret = cnt;
6537 out:
6538 	mutex_unlock(&trace_types_lock);
6539 
6540 	return ret;
6541 }
6542 
6543 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6544 
6545 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6546 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6547 		     size_t cnt, loff_t *ppos)
6548 {
6549 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6550 }
6551 
6552 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6553 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6554 		      size_t cnt, loff_t *ppos)
6555 {
6556 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6557 }
6558 
6559 #endif
6560 
tracing_open_pipe(struct inode * inode,struct file * filp)6561 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6562 {
6563 	struct trace_array *tr = inode->i_private;
6564 	struct trace_iterator *iter;
6565 	int ret;
6566 
6567 	ret = tracing_check_open_get_tr(tr);
6568 	if (ret)
6569 		return ret;
6570 
6571 	mutex_lock(&trace_types_lock);
6572 
6573 	/* create a buffer to store the information to pass to userspace */
6574 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6575 	if (!iter) {
6576 		ret = -ENOMEM;
6577 		__trace_array_put(tr);
6578 		goto out;
6579 	}
6580 
6581 	trace_seq_init(&iter->seq);
6582 	iter->trace = tr->current_trace;
6583 
6584 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6585 		ret = -ENOMEM;
6586 		goto fail;
6587 	}
6588 
6589 	/* trace pipe does not show start of buffer */
6590 	cpumask_setall(iter->started);
6591 
6592 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6593 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6594 
6595 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6596 	if (trace_clocks[tr->clock_id].in_ns)
6597 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6598 
6599 	iter->tr = tr;
6600 	iter->array_buffer = &tr->array_buffer;
6601 	iter->cpu_file = tracing_get_cpu(inode);
6602 	mutex_init(&iter->mutex);
6603 	filp->private_data = iter;
6604 
6605 	if (iter->trace->pipe_open)
6606 		iter->trace->pipe_open(iter);
6607 
6608 	nonseekable_open(inode, filp);
6609 
6610 	tr->trace_ref++;
6611 out:
6612 	mutex_unlock(&trace_types_lock);
6613 	return ret;
6614 
6615 fail:
6616 	kfree(iter);
6617 	__trace_array_put(tr);
6618 	mutex_unlock(&trace_types_lock);
6619 	return ret;
6620 }
6621 
tracing_release_pipe(struct inode * inode,struct file * file)6622 static int tracing_release_pipe(struct inode *inode, struct file *file)
6623 {
6624 	struct trace_iterator *iter = file->private_data;
6625 	struct trace_array *tr = inode->i_private;
6626 
6627 	mutex_lock(&trace_types_lock);
6628 
6629 	tr->trace_ref--;
6630 
6631 	if (iter->trace->pipe_close)
6632 		iter->trace->pipe_close(iter);
6633 
6634 	mutex_unlock(&trace_types_lock);
6635 
6636 	free_cpumask_var(iter->started);
6637 	mutex_destroy(&iter->mutex);
6638 	kfree(iter);
6639 
6640 	trace_array_put(tr);
6641 
6642 	return 0;
6643 }
6644 
6645 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6646 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6647 {
6648 	struct trace_array *tr = iter->tr;
6649 
6650 	/* Iterators are static, they should be filled or empty */
6651 	if (trace_buffer_iter(iter, iter->cpu_file))
6652 		return EPOLLIN | EPOLLRDNORM;
6653 
6654 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6655 		/*
6656 		 * Always select as readable when in blocking mode
6657 		 */
6658 		return EPOLLIN | EPOLLRDNORM;
6659 	else
6660 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6661 					     filp, poll_table);
6662 }
6663 
6664 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6665 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6666 {
6667 	struct trace_iterator *iter = filp->private_data;
6668 
6669 	return trace_poll(iter, filp, poll_table);
6670 }
6671 
6672 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6673 static int tracing_wait_pipe(struct file *filp)
6674 {
6675 	struct trace_iterator *iter = filp->private_data;
6676 	int ret;
6677 
6678 	while (trace_empty(iter)) {
6679 
6680 		if ((filp->f_flags & O_NONBLOCK)) {
6681 			return -EAGAIN;
6682 		}
6683 
6684 		/*
6685 		 * We block until we read something and tracing is disabled.
6686 		 * We still block if tracing is disabled, but we have never
6687 		 * read anything. This allows a user to cat this file, and
6688 		 * then enable tracing. But after we have read something,
6689 		 * we give an EOF when tracing is again disabled.
6690 		 *
6691 		 * iter->pos will be 0 if we haven't read anything.
6692 		 */
6693 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6694 			break;
6695 
6696 		mutex_unlock(&iter->mutex);
6697 
6698 		ret = wait_on_pipe(iter, 0);
6699 
6700 		mutex_lock(&iter->mutex);
6701 
6702 		if (ret)
6703 			return ret;
6704 	}
6705 
6706 	return 1;
6707 }
6708 
6709 /*
6710  * Consumer reader.
6711  */
6712 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6713 tracing_read_pipe(struct file *filp, char __user *ubuf,
6714 		  size_t cnt, loff_t *ppos)
6715 {
6716 	struct trace_iterator *iter = filp->private_data;
6717 	ssize_t sret;
6718 
6719 	/*
6720 	 * Avoid more than one consumer on a single file descriptor
6721 	 * This is just a matter of traces coherency, the ring buffer itself
6722 	 * is protected.
6723 	 */
6724 	mutex_lock(&iter->mutex);
6725 
6726 	/* return any leftover data */
6727 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6728 	if (sret != -EBUSY)
6729 		goto out;
6730 
6731 	trace_seq_init(&iter->seq);
6732 
6733 	if (iter->trace->read) {
6734 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6735 		if (sret)
6736 			goto out;
6737 	}
6738 
6739 waitagain:
6740 	sret = tracing_wait_pipe(filp);
6741 	if (sret <= 0)
6742 		goto out;
6743 
6744 	/* stop when tracing is finished */
6745 	if (trace_empty(iter)) {
6746 		sret = 0;
6747 		goto out;
6748 	}
6749 
6750 	if (cnt >= PAGE_SIZE)
6751 		cnt = PAGE_SIZE - 1;
6752 
6753 	/* reset all but tr, trace, and overruns */
6754 	trace_iterator_reset(iter);
6755 	cpumask_clear(iter->started);
6756 	trace_seq_init(&iter->seq);
6757 
6758 	trace_event_read_lock();
6759 	trace_access_lock(iter->cpu_file);
6760 	while (trace_find_next_entry_inc(iter) != NULL) {
6761 		enum print_line_t ret;
6762 		int save_len = iter->seq.seq.len;
6763 
6764 		ret = print_trace_line(iter);
6765 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6766 			/* don't print partial lines */
6767 			iter->seq.seq.len = save_len;
6768 			break;
6769 		}
6770 		if (ret != TRACE_TYPE_NO_CONSUME)
6771 			trace_consume(iter);
6772 
6773 		if (trace_seq_used(&iter->seq) >= cnt)
6774 			break;
6775 
6776 		/*
6777 		 * Setting the full flag means we reached the trace_seq buffer
6778 		 * size and we should leave by partial output condition above.
6779 		 * One of the trace_seq_* functions is not used properly.
6780 		 */
6781 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6782 			  iter->ent->type);
6783 	}
6784 	trace_access_unlock(iter->cpu_file);
6785 	trace_event_read_unlock();
6786 
6787 	/* Now copy what we have to the user */
6788 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6789 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6790 		trace_seq_init(&iter->seq);
6791 
6792 	/*
6793 	 * If there was nothing to send to user, in spite of consuming trace
6794 	 * entries, go back to wait for more entries.
6795 	 */
6796 	if (sret == -EBUSY)
6797 		goto waitagain;
6798 
6799 out:
6800 	mutex_unlock(&iter->mutex);
6801 
6802 	return sret;
6803 }
6804 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6805 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6806 				     unsigned int idx)
6807 {
6808 	__free_page(spd->pages[idx]);
6809 }
6810 
6811 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6812 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6813 {
6814 	size_t count;
6815 	int save_len;
6816 	int ret;
6817 
6818 	/* Seq buffer is page-sized, exactly what we need. */
6819 	for (;;) {
6820 		save_len = iter->seq.seq.len;
6821 		ret = print_trace_line(iter);
6822 
6823 		if (trace_seq_has_overflowed(&iter->seq)) {
6824 			iter->seq.seq.len = save_len;
6825 			break;
6826 		}
6827 
6828 		/*
6829 		 * This should not be hit, because it should only
6830 		 * be set if the iter->seq overflowed. But check it
6831 		 * anyway to be safe.
6832 		 */
6833 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6834 			iter->seq.seq.len = save_len;
6835 			break;
6836 		}
6837 
6838 		count = trace_seq_used(&iter->seq) - save_len;
6839 		if (rem < count) {
6840 			rem = 0;
6841 			iter->seq.seq.len = save_len;
6842 			break;
6843 		}
6844 
6845 		if (ret != TRACE_TYPE_NO_CONSUME)
6846 			trace_consume(iter);
6847 		rem -= count;
6848 		if (!trace_find_next_entry_inc(iter))	{
6849 			rem = 0;
6850 			iter->ent = NULL;
6851 			break;
6852 		}
6853 	}
6854 
6855 	return rem;
6856 }
6857 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6858 static ssize_t tracing_splice_read_pipe(struct file *filp,
6859 					loff_t *ppos,
6860 					struct pipe_inode_info *pipe,
6861 					size_t len,
6862 					unsigned int flags)
6863 {
6864 	struct page *pages_def[PIPE_DEF_BUFFERS];
6865 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6866 	struct trace_iterator *iter = filp->private_data;
6867 	struct splice_pipe_desc spd = {
6868 		.pages		= pages_def,
6869 		.partial	= partial_def,
6870 		.nr_pages	= 0, /* This gets updated below. */
6871 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6872 		.ops		= &default_pipe_buf_ops,
6873 		.spd_release	= tracing_spd_release_pipe,
6874 	};
6875 	ssize_t ret;
6876 	size_t rem;
6877 	unsigned int i;
6878 
6879 	if (splice_grow_spd(pipe, &spd))
6880 		return -ENOMEM;
6881 
6882 	mutex_lock(&iter->mutex);
6883 
6884 	if (iter->trace->splice_read) {
6885 		ret = iter->trace->splice_read(iter, filp,
6886 					       ppos, pipe, len, flags);
6887 		if (ret)
6888 			goto out_err;
6889 	}
6890 
6891 	ret = tracing_wait_pipe(filp);
6892 	if (ret <= 0)
6893 		goto out_err;
6894 
6895 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6896 		ret = -EFAULT;
6897 		goto out_err;
6898 	}
6899 
6900 	trace_event_read_lock();
6901 	trace_access_lock(iter->cpu_file);
6902 
6903 	/* Fill as many pages as possible. */
6904 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6905 		spd.pages[i] = alloc_page(GFP_KERNEL);
6906 		if (!spd.pages[i])
6907 			break;
6908 
6909 		rem = tracing_fill_pipe_page(rem, iter);
6910 
6911 		/* Copy the data into the page, so we can start over. */
6912 		ret = trace_seq_to_buffer(&iter->seq,
6913 					  page_address(spd.pages[i]),
6914 					  trace_seq_used(&iter->seq));
6915 		if (ret < 0) {
6916 			__free_page(spd.pages[i]);
6917 			break;
6918 		}
6919 		spd.partial[i].offset = 0;
6920 		spd.partial[i].len = trace_seq_used(&iter->seq);
6921 
6922 		trace_seq_init(&iter->seq);
6923 	}
6924 
6925 	trace_access_unlock(iter->cpu_file);
6926 	trace_event_read_unlock();
6927 	mutex_unlock(&iter->mutex);
6928 
6929 	spd.nr_pages = i;
6930 
6931 	if (i)
6932 		ret = splice_to_pipe(pipe, &spd);
6933 	else
6934 		ret = 0;
6935 out:
6936 	splice_shrink_spd(&spd);
6937 	return ret;
6938 
6939 out_err:
6940 	mutex_unlock(&iter->mutex);
6941 	goto out;
6942 }
6943 
6944 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6945 tracing_entries_read(struct file *filp, char __user *ubuf,
6946 		     size_t cnt, loff_t *ppos)
6947 {
6948 	struct inode *inode = file_inode(filp);
6949 	struct trace_array *tr = inode->i_private;
6950 	int cpu = tracing_get_cpu(inode);
6951 	char buf[64];
6952 	int r = 0;
6953 	ssize_t ret;
6954 
6955 	mutex_lock(&trace_types_lock);
6956 
6957 	if (cpu == RING_BUFFER_ALL_CPUS) {
6958 		int cpu, buf_size_same;
6959 		unsigned long size;
6960 
6961 		size = 0;
6962 		buf_size_same = 1;
6963 		/* check if all cpu sizes are same */
6964 		for_each_tracing_cpu(cpu) {
6965 			/* fill in the size from first enabled cpu */
6966 			if (size == 0)
6967 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6968 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6969 				buf_size_same = 0;
6970 				break;
6971 			}
6972 		}
6973 
6974 		if (buf_size_same) {
6975 			if (!ring_buffer_expanded)
6976 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6977 					    size >> 10,
6978 					    trace_buf_size >> 10);
6979 			else
6980 				r = sprintf(buf, "%lu\n", size >> 10);
6981 		} else
6982 			r = sprintf(buf, "X\n");
6983 	} else
6984 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6985 
6986 	mutex_unlock(&trace_types_lock);
6987 
6988 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6989 	return ret;
6990 }
6991 
6992 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6993 tracing_entries_write(struct file *filp, const char __user *ubuf,
6994 		      size_t cnt, loff_t *ppos)
6995 {
6996 	struct inode *inode = file_inode(filp);
6997 	struct trace_array *tr = inode->i_private;
6998 	unsigned long val;
6999 	int ret;
7000 
7001 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7002 	if (ret)
7003 		return ret;
7004 
7005 	/* must have at least 1 entry */
7006 	if (!val)
7007 		return -EINVAL;
7008 
7009 	/* value is in KB */
7010 	val <<= 10;
7011 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7012 	if (ret < 0)
7013 		return ret;
7014 
7015 	*ppos += cnt;
7016 
7017 	return cnt;
7018 }
7019 
7020 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7021 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7022 				size_t cnt, loff_t *ppos)
7023 {
7024 	struct trace_array *tr = filp->private_data;
7025 	char buf[64];
7026 	int r, cpu;
7027 	unsigned long size = 0, expanded_size = 0;
7028 
7029 	mutex_lock(&trace_types_lock);
7030 	for_each_tracing_cpu(cpu) {
7031 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7032 		if (!ring_buffer_expanded)
7033 			expanded_size += trace_buf_size >> 10;
7034 	}
7035 	if (ring_buffer_expanded)
7036 		r = sprintf(buf, "%lu\n", size);
7037 	else
7038 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7039 	mutex_unlock(&trace_types_lock);
7040 
7041 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7042 }
7043 
7044 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7045 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7046 			  size_t cnt, loff_t *ppos)
7047 {
7048 	/*
7049 	 * There is no need to read what the user has written, this function
7050 	 * is just to make sure that there is no error when "echo" is used
7051 	 */
7052 
7053 	*ppos += cnt;
7054 
7055 	return cnt;
7056 }
7057 
7058 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7059 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7060 {
7061 	struct trace_array *tr = inode->i_private;
7062 
7063 	/* disable tracing ? */
7064 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7065 		tracer_tracing_off(tr);
7066 	/* resize the ring buffer to 0 */
7067 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7068 
7069 	trace_array_put(tr);
7070 
7071 	return 0;
7072 }
7073 
7074 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7075 tracing_mark_write(struct file *filp, const char __user *ubuf,
7076 					size_t cnt, loff_t *fpos)
7077 {
7078 	struct trace_array *tr = filp->private_data;
7079 	struct ring_buffer_event *event;
7080 	enum event_trigger_type tt = ETT_NONE;
7081 	struct trace_buffer *buffer;
7082 	struct print_entry *entry;
7083 	ssize_t written;
7084 	int size;
7085 	int len;
7086 
7087 /* Used in tracing_mark_raw_write() as well */
7088 #define FAULTED_STR "<faulted>"
7089 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7090 
7091 	if (tracing_disabled)
7092 		return -EINVAL;
7093 
7094 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7095 		return -EINVAL;
7096 
7097 	if (cnt > TRACE_BUF_SIZE)
7098 		cnt = TRACE_BUF_SIZE;
7099 
7100 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7101 
7102 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7103 
7104 	/* If less than "<faulted>", then make sure we can still add that */
7105 	if (cnt < FAULTED_SIZE)
7106 		size += FAULTED_SIZE - cnt;
7107 
7108 	buffer = tr->array_buffer.buffer;
7109 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7110 					    tracing_gen_ctx());
7111 	if (unlikely(!event))
7112 		/* Ring buffer disabled, return as if not open for write */
7113 		return -EBADF;
7114 
7115 	entry = ring_buffer_event_data(event);
7116 	entry->ip = _THIS_IP_;
7117 
7118 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7119 	if (len) {
7120 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7121 		cnt = FAULTED_SIZE;
7122 		written = -EFAULT;
7123 	} else
7124 		written = cnt;
7125 
7126 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7127 		/* do not add \n before testing triggers, but add \0 */
7128 		entry->buf[cnt] = '\0';
7129 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7130 	}
7131 
7132 	if (entry->buf[cnt - 1] != '\n') {
7133 		entry->buf[cnt] = '\n';
7134 		entry->buf[cnt + 1] = '\0';
7135 	} else
7136 		entry->buf[cnt] = '\0';
7137 
7138 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7139 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7140 	__buffer_unlock_commit(buffer, event);
7141 
7142 	if (tt)
7143 		event_triggers_post_call(tr->trace_marker_file, tt);
7144 
7145 	return written;
7146 }
7147 
7148 /* Limit it for now to 3K (including tag) */
7149 #define RAW_DATA_MAX_SIZE (1024*3)
7150 
7151 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7152 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7153 					size_t cnt, loff_t *fpos)
7154 {
7155 	struct trace_array *tr = filp->private_data;
7156 	struct ring_buffer_event *event;
7157 	struct trace_buffer *buffer;
7158 	struct raw_data_entry *entry;
7159 	ssize_t written;
7160 	int size;
7161 	int len;
7162 
7163 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7164 
7165 	if (tracing_disabled)
7166 		return -EINVAL;
7167 
7168 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7169 		return -EINVAL;
7170 
7171 	/* The marker must at least have a tag id */
7172 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7173 		return -EINVAL;
7174 
7175 	if (cnt > TRACE_BUF_SIZE)
7176 		cnt = TRACE_BUF_SIZE;
7177 
7178 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7179 
7180 	size = sizeof(*entry) + cnt;
7181 	if (cnt < FAULT_SIZE_ID)
7182 		size += FAULT_SIZE_ID - cnt;
7183 
7184 	buffer = tr->array_buffer.buffer;
7185 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7186 					    tracing_gen_ctx());
7187 	if (!event)
7188 		/* Ring buffer disabled, return as if not open for write */
7189 		return -EBADF;
7190 
7191 	entry = ring_buffer_event_data(event);
7192 
7193 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7194 	if (len) {
7195 		entry->id = -1;
7196 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7197 		written = -EFAULT;
7198 	} else
7199 		written = cnt;
7200 
7201 	__buffer_unlock_commit(buffer, event);
7202 
7203 	return written;
7204 }
7205 
tracing_clock_show(struct seq_file * m,void * v)7206 static int tracing_clock_show(struct seq_file *m, void *v)
7207 {
7208 	struct trace_array *tr = m->private;
7209 	int i;
7210 
7211 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7212 		seq_printf(m,
7213 			"%s%s%s%s", i ? " " : "",
7214 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7215 			i == tr->clock_id ? "]" : "");
7216 	seq_putc(m, '\n');
7217 
7218 	return 0;
7219 }
7220 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7221 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7222 {
7223 	int i;
7224 
7225 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7226 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7227 			break;
7228 	}
7229 	if (i == ARRAY_SIZE(trace_clocks))
7230 		return -EINVAL;
7231 
7232 	mutex_lock(&trace_types_lock);
7233 
7234 	tr->clock_id = i;
7235 
7236 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7237 
7238 	/*
7239 	 * New clock may not be consistent with the previous clock.
7240 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7241 	 */
7242 	tracing_reset_online_cpus(&tr->array_buffer);
7243 
7244 #ifdef CONFIG_TRACER_MAX_TRACE
7245 	if (tr->max_buffer.buffer)
7246 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7247 	tracing_reset_online_cpus(&tr->max_buffer);
7248 #endif
7249 
7250 	mutex_unlock(&trace_types_lock);
7251 
7252 	return 0;
7253 }
7254 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7255 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7256 				   size_t cnt, loff_t *fpos)
7257 {
7258 	struct seq_file *m = filp->private_data;
7259 	struct trace_array *tr = m->private;
7260 	char buf[64];
7261 	const char *clockstr;
7262 	int ret;
7263 
7264 	if (cnt >= sizeof(buf))
7265 		return -EINVAL;
7266 
7267 	if (copy_from_user(buf, ubuf, cnt))
7268 		return -EFAULT;
7269 
7270 	buf[cnt] = 0;
7271 
7272 	clockstr = strstrip(buf);
7273 
7274 	ret = tracing_set_clock(tr, clockstr);
7275 	if (ret)
7276 		return ret;
7277 
7278 	*fpos += cnt;
7279 
7280 	return cnt;
7281 }
7282 
tracing_clock_open(struct inode * inode,struct file * file)7283 static int tracing_clock_open(struct inode *inode, struct file *file)
7284 {
7285 	struct trace_array *tr = inode->i_private;
7286 	int ret;
7287 
7288 	ret = tracing_check_open_get_tr(tr);
7289 	if (ret)
7290 		return ret;
7291 
7292 	ret = single_open(file, tracing_clock_show, inode->i_private);
7293 	if (ret < 0)
7294 		trace_array_put(tr);
7295 
7296 	return ret;
7297 }
7298 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7299 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7300 {
7301 	struct trace_array *tr = m->private;
7302 
7303 	mutex_lock(&trace_types_lock);
7304 
7305 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7306 		seq_puts(m, "delta [absolute]\n");
7307 	else
7308 		seq_puts(m, "[delta] absolute\n");
7309 
7310 	mutex_unlock(&trace_types_lock);
7311 
7312 	return 0;
7313 }
7314 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7315 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7316 {
7317 	struct trace_array *tr = inode->i_private;
7318 	int ret;
7319 
7320 	ret = tracing_check_open_get_tr(tr);
7321 	if (ret)
7322 		return ret;
7323 
7324 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7325 	if (ret < 0)
7326 		trace_array_put(tr);
7327 
7328 	return ret;
7329 }
7330 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7331 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7332 {
7333 	if (rbe == this_cpu_read(trace_buffered_event))
7334 		return ring_buffer_time_stamp(buffer);
7335 
7336 	return ring_buffer_event_time_stamp(buffer, rbe);
7337 }
7338 
7339 /*
7340  * Set or disable using the per CPU trace_buffer_event when possible.
7341  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7342 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7343 {
7344 	int ret = 0;
7345 
7346 	mutex_lock(&trace_types_lock);
7347 
7348 	if (set && tr->no_filter_buffering_ref++)
7349 		goto out;
7350 
7351 	if (!set) {
7352 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7353 			ret = -EINVAL;
7354 			goto out;
7355 		}
7356 
7357 		--tr->no_filter_buffering_ref;
7358 	}
7359  out:
7360 	mutex_unlock(&trace_types_lock);
7361 
7362 	return ret;
7363 }
7364 
7365 struct ftrace_buffer_info {
7366 	struct trace_iterator	iter;
7367 	void			*spare;
7368 	unsigned int		spare_cpu;
7369 	unsigned int		read;
7370 };
7371 
7372 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7373 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7374 {
7375 	struct trace_array *tr = inode->i_private;
7376 	struct trace_iterator *iter;
7377 	struct seq_file *m;
7378 	int ret;
7379 
7380 	ret = tracing_check_open_get_tr(tr);
7381 	if (ret)
7382 		return ret;
7383 
7384 	if (file->f_mode & FMODE_READ) {
7385 		iter = __tracing_open(inode, file, true);
7386 		if (IS_ERR(iter))
7387 			ret = PTR_ERR(iter);
7388 	} else {
7389 		/* Writes still need the seq_file to hold the private data */
7390 		ret = -ENOMEM;
7391 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7392 		if (!m)
7393 			goto out;
7394 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7395 		if (!iter) {
7396 			kfree(m);
7397 			goto out;
7398 		}
7399 		ret = 0;
7400 
7401 		iter->tr = tr;
7402 		iter->array_buffer = &tr->max_buffer;
7403 		iter->cpu_file = tracing_get_cpu(inode);
7404 		m->private = iter;
7405 		file->private_data = m;
7406 	}
7407 out:
7408 	if (ret < 0)
7409 		trace_array_put(tr);
7410 
7411 	return ret;
7412 }
7413 
7414 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7415 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7416 		       loff_t *ppos)
7417 {
7418 	struct seq_file *m = filp->private_data;
7419 	struct trace_iterator *iter = m->private;
7420 	struct trace_array *tr = iter->tr;
7421 	unsigned long val;
7422 	int ret;
7423 
7424 	ret = tracing_update_buffers();
7425 	if (ret < 0)
7426 		return ret;
7427 
7428 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7429 	if (ret)
7430 		return ret;
7431 
7432 	mutex_lock(&trace_types_lock);
7433 
7434 	if (tr->current_trace->use_max_tr) {
7435 		ret = -EBUSY;
7436 		goto out;
7437 	}
7438 
7439 	arch_spin_lock(&tr->max_lock);
7440 	if (tr->cond_snapshot)
7441 		ret = -EBUSY;
7442 	arch_spin_unlock(&tr->max_lock);
7443 	if (ret)
7444 		goto out;
7445 
7446 	switch (val) {
7447 	case 0:
7448 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7449 			ret = -EINVAL;
7450 			break;
7451 		}
7452 		if (tr->allocated_snapshot)
7453 			free_snapshot(tr);
7454 		break;
7455 	case 1:
7456 /* Only allow per-cpu swap if the ring buffer supports it */
7457 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7458 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7459 			ret = -EINVAL;
7460 			break;
7461 		}
7462 #endif
7463 		if (tr->allocated_snapshot)
7464 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7465 					&tr->array_buffer, iter->cpu_file);
7466 		else
7467 			ret = tracing_alloc_snapshot_instance(tr);
7468 		if (ret < 0)
7469 			break;
7470 		local_irq_disable();
7471 		/* Now, we're going to swap */
7472 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7473 			update_max_tr(tr, current, smp_processor_id(), NULL);
7474 		else
7475 			update_max_tr_single(tr, current, iter->cpu_file);
7476 		local_irq_enable();
7477 		break;
7478 	default:
7479 		if (tr->allocated_snapshot) {
7480 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7481 				tracing_reset_online_cpus(&tr->max_buffer);
7482 			else
7483 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7484 		}
7485 		break;
7486 	}
7487 
7488 	if (ret >= 0) {
7489 		*ppos += cnt;
7490 		ret = cnt;
7491 	}
7492 out:
7493 	mutex_unlock(&trace_types_lock);
7494 	return ret;
7495 }
7496 
tracing_snapshot_release(struct inode * inode,struct file * file)7497 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7498 {
7499 	struct seq_file *m = file->private_data;
7500 	int ret;
7501 
7502 	ret = tracing_release(inode, file);
7503 
7504 	if (file->f_mode & FMODE_READ)
7505 		return ret;
7506 
7507 	/* If write only, the seq_file is just a stub */
7508 	if (m)
7509 		kfree(m->private);
7510 	kfree(m);
7511 
7512 	return 0;
7513 }
7514 
7515 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7516 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7517 				    size_t count, loff_t *ppos);
7518 static int tracing_buffers_release(struct inode *inode, struct file *file);
7519 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7520 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7521 
snapshot_raw_open(struct inode * inode,struct file * filp)7522 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7523 {
7524 	struct ftrace_buffer_info *info;
7525 	int ret;
7526 
7527 	/* The following checks for tracefs lockdown */
7528 	ret = tracing_buffers_open(inode, filp);
7529 	if (ret < 0)
7530 		return ret;
7531 
7532 	info = filp->private_data;
7533 
7534 	if (info->iter.trace->use_max_tr) {
7535 		tracing_buffers_release(inode, filp);
7536 		return -EBUSY;
7537 	}
7538 
7539 	info->iter.snapshot = true;
7540 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7541 
7542 	return ret;
7543 }
7544 
7545 #endif /* CONFIG_TRACER_SNAPSHOT */
7546 
7547 
7548 static const struct file_operations tracing_thresh_fops = {
7549 	.open		= tracing_open_generic,
7550 	.read		= tracing_thresh_read,
7551 	.write		= tracing_thresh_write,
7552 	.llseek		= generic_file_llseek,
7553 };
7554 
7555 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7556 static const struct file_operations tracing_max_lat_fops = {
7557 	.open		= tracing_open_generic,
7558 	.read		= tracing_max_lat_read,
7559 	.write		= tracing_max_lat_write,
7560 	.llseek		= generic_file_llseek,
7561 };
7562 #endif
7563 
7564 static const struct file_operations set_tracer_fops = {
7565 	.open		= tracing_open_generic,
7566 	.read		= tracing_set_trace_read,
7567 	.write		= tracing_set_trace_write,
7568 	.llseek		= generic_file_llseek,
7569 };
7570 
7571 static const struct file_operations tracing_pipe_fops = {
7572 	.open		= tracing_open_pipe,
7573 	.poll		= tracing_poll_pipe,
7574 	.read		= tracing_read_pipe,
7575 	.splice_read	= tracing_splice_read_pipe,
7576 	.release	= tracing_release_pipe,
7577 	.llseek		= no_llseek,
7578 };
7579 
7580 static const struct file_operations tracing_entries_fops = {
7581 	.open		= tracing_open_generic_tr,
7582 	.read		= tracing_entries_read,
7583 	.write		= tracing_entries_write,
7584 	.llseek		= generic_file_llseek,
7585 	.release	= tracing_release_generic_tr,
7586 };
7587 
7588 static const struct file_operations tracing_total_entries_fops = {
7589 	.open		= tracing_open_generic_tr,
7590 	.read		= tracing_total_entries_read,
7591 	.llseek		= generic_file_llseek,
7592 	.release	= tracing_release_generic_tr,
7593 };
7594 
7595 static const struct file_operations tracing_free_buffer_fops = {
7596 	.open		= tracing_open_generic_tr,
7597 	.write		= tracing_free_buffer_write,
7598 	.release	= tracing_free_buffer_release,
7599 };
7600 
7601 static const struct file_operations tracing_mark_fops = {
7602 	.open		= tracing_mark_open,
7603 	.write		= tracing_mark_write,
7604 	.release	= tracing_release_generic_tr,
7605 };
7606 
7607 static const struct file_operations tracing_mark_raw_fops = {
7608 	.open		= tracing_mark_open,
7609 	.write		= tracing_mark_raw_write,
7610 	.release	= tracing_release_generic_tr,
7611 };
7612 
7613 static const struct file_operations trace_clock_fops = {
7614 	.open		= tracing_clock_open,
7615 	.read		= seq_read,
7616 	.llseek		= seq_lseek,
7617 	.release	= tracing_single_release_tr,
7618 	.write		= tracing_clock_write,
7619 };
7620 
7621 static const struct file_operations trace_time_stamp_mode_fops = {
7622 	.open		= tracing_time_stamp_mode_open,
7623 	.read		= seq_read,
7624 	.llseek		= seq_lseek,
7625 	.release	= tracing_single_release_tr,
7626 };
7627 
7628 #ifdef CONFIG_TRACER_SNAPSHOT
7629 static const struct file_operations snapshot_fops = {
7630 	.open		= tracing_snapshot_open,
7631 	.read		= seq_read,
7632 	.write		= tracing_snapshot_write,
7633 	.llseek		= tracing_lseek,
7634 	.release	= tracing_snapshot_release,
7635 };
7636 
7637 static const struct file_operations snapshot_raw_fops = {
7638 	.open		= snapshot_raw_open,
7639 	.read		= tracing_buffers_read,
7640 	.release	= tracing_buffers_release,
7641 	.splice_read	= tracing_buffers_splice_read,
7642 	.llseek		= no_llseek,
7643 };
7644 
7645 #endif /* CONFIG_TRACER_SNAPSHOT */
7646 
7647 /*
7648  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7649  * @filp: The active open file structure
7650  * @ubuf: The userspace provided buffer to read value into
7651  * @cnt: The maximum number of bytes to read
7652  * @ppos: The current "file" position
7653  *
7654  * This function implements the write interface for a struct trace_min_max_param.
7655  * The filp->private_data must point to a trace_min_max_param structure that
7656  * defines where to write the value, the min and the max acceptable values,
7657  * and a lock to protect the write.
7658  */
7659 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7660 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7661 {
7662 	struct trace_min_max_param *param = filp->private_data;
7663 	u64 val;
7664 	int err;
7665 
7666 	if (!param)
7667 		return -EFAULT;
7668 
7669 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7670 	if (err)
7671 		return err;
7672 
7673 	if (param->lock)
7674 		mutex_lock(param->lock);
7675 
7676 	if (param->min && val < *param->min)
7677 		err = -EINVAL;
7678 
7679 	if (param->max && val > *param->max)
7680 		err = -EINVAL;
7681 
7682 	if (!err)
7683 		*param->val = val;
7684 
7685 	if (param->lock)
7686 		mutex_unlock(param->lock);
7687 
7688 	if (err)
7689 		return err;
7690 
7691 	return cnt;
7692 }
7693 
7694 /*
7695  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7696  * @filp: The active open file structure
7697  * @ubuf: The userspace provided buffer to read value into
7698  * @cnt: The maximum number of bytes to read
7699  * @ppos: The current "file" position
7700  *
7701  * This function implements the read interface for a struct trace_min_max_param.
7702  * The filp->private_data must point to a trace_min_max_param struct with valid
7703  * data.
7704  */
7705 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7706 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7707 {
7708 	struct trace_min_max_param *param = filp->private_data;
7709 	char buf[U64_STR_SIZE];
7710 	int len;
7711 	u64 val;
7712 
7713 	if (!param)
7714 		return -EFAULT;
7715 
7716 	val = *param->val;
7717 
7718 	if (cnt > sizeof(buf))
7719 		cnt = sizeof(buf);
7720 
7721 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7722 
7723 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7724 }
7725 
7726 const struct file_operations trace_min_max_fops = {
7727 	.open		= tracing_open_generic,
7728 	.read		= trace_min_max_read,
7729 	.write		= trace_min_max_write,
7730 };
7731 
7732 #define TRACING_LOG_ERRS_MAX	8
7733 #define TRACING_LOG_LOC_MAX	128
7734 
7735 #define CMD_PREFIX "  Command: "
7736 
7737 struct err_info {
7738 	const char	**errs;	/* ptr to loc-specific array of err strings */
7739 	u8		type;	/* index into errs -> specific err string */
7740 	u16		pos;	/* caret position */
7741 	u64		ts;
7742 };
7743 
7744 struct tracing_log_err {
7745 	struct list_head	list;
7746 	struct err_info		info;
7747 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7748 	char			*cmd;                     /* what caused err */
7749 };
7750 
7751 static DEFINE_MUTEX(tracing_err_log_lock);
7752 
alloc_tracing_log_err(int len)7753 static struct tracing_log_err *alloc_tracing_log_err(int len)
7754 {
7755 	struct tracing_log_err *err;
7756 
7757 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7758 	if (!err)
7759 		return ERR_PTR(-ENOMEM);
7760 
7761 	err->cmd = kzalloc(len, GFP_KERNEL);
7762 	if (!err->cmd) {
7763 		kfree(err);
7764 		return ERR_PTR(-ENOMEM);
7765 	}
7766 
7767 	return err;
7768 }
7769 
free_tracing_log_err(struct tracing_log_err * err)7770 static void free_tracing_log_err(struct tracing_log_err *err)
7771 {
7772 	kfree(err->cmd);
7773 	kfree(err);
7774 }
7775 
get_tracing_log_err(struct trace_array * tr,int len)7776 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7777 						   int len)
7778 {
7779 	struct tracing_log_err *err;
7780 
7781 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7782 		err = alloc_tracing_log_err(len);
7783 		if (PTR_ERR(err) != -ENOMEM)
7784 			tr->n_err_log_entries++;
7785 
7786 		return err;
7787 	}
7788 
7789 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7790 	kfree(err->cmd);
7791 	err->cmd = kzalloc(len, GFP_KERNEL);
7792 	if (!err->cmd)
7793 		return ERR_PTR(-ENOMEM);
7794 	list_del(&err->list);
7795 
7796 	return err;
7797 }
7798 
7799 /**
7800  * err_pos - find the position of a string within a command for error careting
7801  * @cmd: The tracing command that caused the error
7802  * @str: The string to position the caret at within @cmd
7803  *
7804  * Finds the position of the first occurrence of @str within @cmd.  The
7805  * return value can be passed to tracing_log_err() for caret placement
7806  * within @cmd.
7807  *
7808  * Returns the index within @cmd of the first occurrence of @str or 0
7809  * if @str was not found.
7810  */
err_pos(char * cmd,const char * str)7811 unsigned int err_pos(char *cmd, const char *str)
7812 {
7813 	char *found;
7814 
7815 	if (WARN_ON(!strlen(cmd)))
7816 		return 0;
7817 
7818 	found = strstr(cmd, str);
7819 	if (found)
7820 		return found - cmd;
7821 
7822 	return 0;
7823 }
7824 
7825 /**
7826  * tracing_log_err - write an error to the tracing error log
7827  * @tr: The associated trace array for the error (NULL for top level array)
7828  * @loc: A string describing where the error occurred
7829  * @cmd: The tracing command that caused the error
7830  * @errs: The array of loc-specific static error strings
7831  * @type: The index into errs[], which produces the specific static err string
7832  * @pos: The position the caret should be placed in the cmd
7833  *
7834  * Writes an error into tracing/error_log of the form:
7835  *
7836  * <loc>: error: <text>
7837  *   Command: <cmd>
7838  *              ^
7839  *
7840  * tracing/error_log is a small log file containing the last
7841  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7842  * unless there has been a tracing error, and the error log can be
7843  * cleared and have its memory freed by writing the empty string in
7844  * truncation mode to it i.e. echo > tracing/error_log.
7845  *
7846  * NOTE: the @errs array along with the @type param are used to
7847  * produce a static error string - this string is not copied and saved
7848  * when the error is logged - only a pointer to it is saved.  See
7849  * existing callers for examples of how static strings are typically
7850  * defined for use with tracing_log_err().
7851  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7852 void tracing_log_err(struct trace_array *tr,
7853 		     const char *loc, const char *cmd,
7854 		     const char **errs, u8 type, u16 pos)
7855 {
7856 	struct tracing_log_err *err;
7857 	int len = 0;
7858 
7859 	if (!tr)
7860 		tr = &global_trace;
7861 
7862 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7863 
7864 	mutex_lock(&tracing_err_log_lock);
7865 	err = get_tracing_log_err(tr, len);
7866 	if (PTR_ERR(err) == -ENOMEM) {
7867 		mutex_unlock(&tracing_err_log_lock);
7868 		return;
7869 	}
7870 
7871 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7872 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7873 
7874 	err->info.errs = errs;
7875 	err->info.type = type;
7876 	err->info.pos = pos;
7877 	err->info.ts = local_clock();
7878 
7879 	list_add_tail(&err->list, &tr->err_log);
7880 	mutex_unlock(&tracing_err_log_lock);
7881 }
7882 
clear_tracing_err_log(struct trace_array * tr)7883 static void clear_tracing_err_log(struct trace_array *tr)
7884 {
7885 	struct tracing_log_err *err, *next;
7886 
7887 	mutex_lock(&tracing_err_log_lock);
7888 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7889 		list_del(&err->list);
7890 		free_tracing_log_err(err);
7891 	}
7892 
7893 	tr->n_err_log_entries = 0;
7894 	mutex_unlock(&tracing_err_log_lock);
7895 }
7896 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7897 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7898 {
7899 	struct trace_array *tr = m->private;
7900 
7901 	mutex_lock(&tracing_err_log_lock);
7902 
7903 	return seq_list_start(&tr->err_log, *pos);
7904 }
7905 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7906 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7907 {
7908 	struct trace_array *tr = m->private;
7909 
7910 	return seq_list_next(v, &tr->err_log, pos);
7911 }
7912 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7913 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7914 {
7915 	mutex_unlock(&tracing_err_log_lock);
7916 }
7917 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7918 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7919 {
7920 	u16 i;
7921 
7922 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7923 		seq_putc(m, ' ');
7924 	for (i = 0; i < pos; i++)
7925 		seq_putc(m, ' ');
7926 	seq_puts(m, "^\n");
7927 }
7928 
tracing_err_log_seq_show(struct seq_file * m,void * v)7929 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7930 {
7931 	struct tracing_log_err *err = v;
7932 
7933 	if (err) {
7934 		const char *err_text = err->info.errs[err->info.type];
7935 		u64 sec = err->info.ts;
7936 		u32 nsec;
7937 
7938 		nsec = do_div(sec, NSEC_PER_SEC);
7939 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7940 			   err->loc, err_text);
7941 		seq_printf(m, "%s", err->cmd);
7942 		tracing_err_log_show_pos(m, err->info.pos);
7943 	}
7944 
7945 	return 0;
7946 }
7947 
7948 static const struct seq_operations tracing_err_log_seq_ops = {
7949 	.start  = tracing_err_log_seq_start,
7950 	.next   = tracing_err_log_seq_next,
7951 	.stop   = tracing_err_log_seq_stop,
7952 	.show   = tracing_err_log_seq_show
7953 };
7954 
tracing_err_log_open(struct inode * inode,struct file * file)7955 static int tracing_err_log_open(struct inode *inode, struct file *file)
7956 {
7957 	struct trace_array *tr = inode->i_private;
7958 	int ret = 0;
7959 
7960 	ret = tracing_check_open_get_tr(tr);
7961 	if (ret)
7962 		return ret;
7963 
7964 	/* If this file was opened for write, then erase contents */
7965 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7966 		clear_tracing_err_log(tr);
7967 
7968 	if (file->f_mode & FMODE_READ) {
7969 		ret = seq_open(file, &tracing_err_log_seq_ops);
7970 		if (!ret) {
7971 			struct seq_file *m = file->private_data;
7972 			m->private = tr;
7973 		} else {
7974 			trace_array_put(tr);
7975 		}
7976 	}
7977 	return ret;
7978 }
7979 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7980 static ssize_t tracing_err_log_write(struct file *file,
7981 				     const char __user *buffer,
7982 				     size_t count, loff_t *ppos)
7983 {
7984 	return count;
7985 }
7986 
tracing_err_log_release(struct inode * inode,struct file * file)7987 static int tracing_err_log_release(struct inode *inode, struct file *file)
7988 {
7989 	struct trace_array *tr = inode->i_private;
7990 
7991 	trace_array_put(tr);
7992 
7993 	if (file->f_mode & FMODE_READ)
7994 		seq_release(inode, file);
7995 
7996 	return 0;
7997 }
7998 
7999 static const struct file_operations tracing_err_log_fops = {
8000 	.open           = tracing_err_log_open,
8001 	.write		= tracing_err_log_write,
8002 	.read           = seq_read,
8003 	.llseek         = seq_lseek,
8004 	.release        = tracing_err_log_release,
8005 };
8006 
tracing_buffers_open(struct inode * inode,struct file * filp)8007 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8008 {
8009 	struct trace_array *tr = inode->i_private;
8010 	struct ftrace_buffer_info *info;
8011 	int ret;
8012 
8013 	ret = tracing_check_open_get_tr(tr);
8014 	if (ret)
8015 		return ret;
8016 
8017 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8018 	if (!info) {
8019 		trace_array_put(tr);
8020 		return -ENOMEM;
8021 	}
8022 
8023 	mutex_lock(&trace_types_lock);
8024 
8025 	info->iter.tr		= tr;
8026 	info->iter.cpu_file	= tracing_get_cpu(inode);
8027 	info->iter.trace	= tr->current_trace;
8028 	info->iter.array_buffer = &tr->array_buffer;
8029 	info->spare		= NULL;
8030 	/* Force reading ring buffer for first read */
8031 	info->read		= (unsigned int)-1;
8032 
8033 	filp->private_data = info;
8034 
8035 	tr->trace_ref++;
8036 
8037 	mutex_unlock(&trace_types_lock);
8038 
8039 	ret = nonseekable_open(inode, filp);
8040 	if (ret < 0)
8041 		trace_array_put(tr);
8042 
8043 	return ret;
8044 }
8045 
8046 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8047 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8048 {
8049 	struct ftrace_buffer_info *info = filp->private_data;
8050 	struct trace_iterator *iter = &info->iter;
8051 
8052 	return trace_poll(iter, filp, poll_table);
8053 }
8054 
8055 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8056 tracing_buffers_read(struct file *filp, char __user *ubuf,
8057 		     size_t count, loff_t *ppos)
8058 {
8059 	struct ftrace_buffer_info *info = filp->private_data;
8060 	struct trace_iterator *iter = &info->iter;
8061 	ssize_t ret = 0;
8062 	ssize_t size;
8063 
8064 	if (!count)
8065 		return 0;
8066 
8067 #ifdef CONFIG_TRACER_MAX_TRACE
8068 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8069 		return -EBUSY;
8070 #endif
8071 
8072 	if (!info->spare) {
8073 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8074 							  iter->cpu_file);
8075 		if (IS_ERR(info->spare)) {
8076 			ret = PTR_ERR(info->spare);
8077 			info->spare = NULL;
8078 		} else {
8079 			info->spare_cpu = iter->cpu_file;
8080 		}
8081 	}
8082 	if (!info->spare)
8083 		return ret;
8084 
8085 	/* Do we have previous read data to read? */
8086 	if (info->read < PAGE_SIZE)
8087 		goto read;
8088 
8089  again:
8090 	trace_access_lock(iter->cpu_file);
8091 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8092 				    &info->spare,
8093 				    count,
8094 				    iter->cpu_file, 0);
8095 	trace_access_unlock(iter->cpu_file);
8096 
8097 	if (ret < 0) {
8098 		if (trace_empty(iter)) {
8099 			if ((filp->f_flags & O_NONBLOCK))
8100 				return -EAGAIN;
8101 
8102 			ret = wait_on_pipe(iter, 0);
8103 			if (ret)
8104 				return ret;
8105 
8106 			goto again;
8107 		}
8108 		return 0;
8109 	}
8110 
8111 	info->read = 0;
8112  read:
8113 	size = PAGE_SIZE - info->read;
8114 	if (size > count)
8115 		size = count;
8116 
8117 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8118 	if (ret == size)
8119 		return -EFAULT;
8120 
8121 	size -= ret;
8122 
8123 	*ppos += size;
8124 	info->read += size;
8125 
8126 	return size;
8127 }
8128 
tracing_buffers_release(struct inode * inode,struct file * file)8129 static int tracing_buffers_release(struct inode *inode, struct file *file)
8130 {
8131 	struct ftrace_buffer_info *info = file->private_data;
8132 	struct trace_iterator *iter = &info->iter;
8133 
8134 	mutex_lock(&trace_types_lock);
8135 
8136 	iter->tr->trace_ref--;
8137 
8138 	__trace_array_put(iter->tr);
8139 
8140 	if (info->spare)
8141 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8142 					   info->spare_cpu, info->spare);
8143 	kvfree(info);
8144 
8145 	mutex_unlock(&trace_types_lock);
8146 
8147 	return 0;
8148 }
8149 
8150 struct buffer_ref {
8151 	struct trace_buffer	*buffer;
8152 	void			*page;
8153 	int			cpu;
8154 	refcount_t		refcount;
8155 };
8156 
buffer_ref_release(struct buffer_ref * ref)8157 static void buffer_ref_release(struct buffer_ref *ref)
8158 {
8159 	if (!refcount_dec_and_test(&ref->refcount))
8160 		return;
8161 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8162 	kfree(ref);
8163 }
8164 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8165 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8166 				    struct pipe_buffer *buf)
8167 {
8168 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8169 
8170 	buffer_ref_release(ref);
8171 	buf->private = 0;
8172 }
8173 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8174 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8175 				struct pipe_buffer *buf)
8176 {
8177 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8178 
8179 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8180 		return false;
8181 
8182 	refcount_inc(&ref->refcount);
8183 	return true;
8184 }
8185 
8186 /* Pipe buffer operations for a buffer. */
8187 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8188 	.release		= buffer_pipe_buf_release,
8189 	.get			= buffer_pipe_buf_get,
8190 };
8191 
8192 /*
8193  * Callback from splice_to_pipe(), if we need to release some pages
8194  * at the end of the spd in case we error'ed out in filling the pipe.
8195  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8196 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8197 {
8198 	struct buffer_ref *ref =
8199 		(struct buffer_ref *)spd->partial[i].private;
8200 
8201 	buffer_ref_release(ref);
8202 	spd->partial[i].private = 0;
8203 }
8204 
8205 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8206 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8207 			    struct pipe_inode_info *pipe, size_t len,
8208 			    unsigned int flags)
8209 {
8210 	struct ftrace_buffer_info *info = file->private_data;
8211 	struct trace_iterator *iter = &info->iter;
8212 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8213 	struct page *pages_def[PIPE_DEF_BUFFERS];
8214 	struct splice_pipe_desc spd = {
8215 		.pages		= pages_def,
8216 		.partial	= partial_def,
8217 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8218 		.ops		= &buffer_pipe_buf_ops,
8219 		.spd_release	= buffer_spd_release,
8220 	};
8221 	struct buffer_ref *ref;
8222 	int entries, i;
8223 	ssize_t ret = 0;
8224 
8225 #ifdef CONFIG_TRACER_MAX_TRACE
8226 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8227 		return -EBUSY;
8228 #endif
8229 
8230 	if (*ppos & (PAGE_SIZE - 1))
8231 		return -EINVAL;
8232 
8233 	if (len & (PAGE_SIZE - 1)) {
8234 		if (len < PAGE_SIZE)
8235 			return -EINVAL;
8236 		len &= PAGE_MASK;
8237 	}
8238 
8239 	if (splice_grow_spd(pipe, &spd))
8240 		return -ENOMEM;
8241 
8242  again:
8243 	trace_access_lock(iter->cpu_file);
8244 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8245 
8246 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8247 		struct page *page;
8248 		int r;
8249 
8250 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8251 		if (!ref) {
8252 			ret = -ENOMEM;
8253 			break;
8254 		}
8255 
8256 		refcount_set(&ref->refcount, 1);
8257 		ref->buffer = iter->array_buffer->buffer;
8258 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8259 		if (IS_ERR(ref->page)) {
8260 			ret = PTR_ERR(ref->page);
8261 			ref->page = NULL;
8262 			kfree(ref);
8263 			break;
8264 		}
8265 		ref->cpu = iter->cpu_file;
8266 
8267 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8268 					  len, iter->cpu_file, 1);
8269 		if (r < 0) {
8270 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8271 						   ref->page);
8272 			kfree(ref);
8273 			break;
8274 		}
8275 
8276 		page = virt_to_page(ref->page);
8277 
8278 		spd.pages[i] = page;
8279 		spd.partial[i].len = PAGE_SIZE;
8280 		spd.partial[i].offset = 0;
8281 		spd.partial[i].private = (unsigned long)ref;
8282 		spd.nr_pages++;
8283 		*ppos += PAGE_SIZE;
8284 
8285 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8286 	}
8287 
8288 	trace_access_unlock(iter->cpu_file);
8289 	spd.nr_pages = i;
8290 
8291 	/* did we read anything? */
8292 	if (!spd.nr_pages) {
8293 		if (ret)
8294 			goto out;
8295 
8296 		ret = -EAGAIN;
8297 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8298 			goto out;
8299 
8300 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8301 		if (ret)
8302 			goto out;
8303 
8304 		goto again;
8305 	}
8306 
8307 	ret = splice_to_pipe(pipe, &spd);
8308 out:
8309 	splice_shrink_spd(&spd);
8310 
8311 	return ret;
8312 }
8313 
8314 static const struct file_operations tracing_buffers_fops = {
8315 	.open		= tracing_buffers_open,
8316 	.read		= tracing_buffers_read,
8317 	.poll		= tracing_buffers_poll,
8318 	.release	= tracing_buffers_release,
8319 	.splice_read	= tracing_buffers_splice_read,
8320 	.llseek		= no_llseek,
8321 };
8322 
8323 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8324 tracing_stats_read(struct file *filp, char __user *ubuf,
8325 		   size_t count, loff_t *ppos)
8326 {
8327 	struct inode *inode = file_inode(filp);
8328 	struct trace_array *tr = inode->i_private;
8329 	struct array_buffer *trace_buf = &tr->array_buffer;
8330 	int cpu = tracing_get_cpu(inode);
8331 	struct trace_seq *s;
8332 	unsigned long cnt;
8333 	unsigned long long t;
8334 	unsigned long usec_rem;
8335 
8336 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8337 	if (!s)
8338 		return -ENOMEM;
8339 
8340 	trace_seq_init(s);
8341 
8342 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8343 	trace_seq_printf(s, "entries: %ld\n", cnt);
8344 
8345 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8346 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8347 
8348 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8349 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8350 
8351 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8352 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8353 
8354 	if (trace_clocks[tr->clock_id].in_ns) {
8355 		/* local or global for trace_clock */
8356 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8357 		usec_rem = do_div(t, USEC_PER_SEC);
8358 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8359 								t, usec_rem);
8360 
8361 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8362 		usec_rem = do_div(t, USEC_PER_SEC);
8363 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8364 	} else {
8365 		/* counter or tsc mode for trace_clock */
8366 		trace_seq_printf(s, "oldest event ts: %llu\n",
8367 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8368 
8369 		trace_seq_printf(s, "now ts: %llu\n",
8370 				ring_buffer_time_stamp(trace_buf->buffer));
8371 	}
8372 
8373 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8374 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8375 
8376 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8377 	trace_seq_printf(s, "read events: %ld\n", cnt);
8378 
8379 	count = simple_read_from_buffer(ubuf, count, ppos,
8380 					s->buffer, trace_seq_used(s));
8381 
8382 	kfree(s);
8383 
8384 	return count;
8385 }
8386 
8387 static const struct file_operations tracing_stats_fops = {
8388 	.open		= tracing_open_generic_tr,
8389 	.read		= tracing_stats_read,
8390 	.llseek		= generic_file_llseek,
8391 	.release	= tracing_release_generic_tr,
8392 };
8393 
8394 #ifdef CONFIG_DYNAMIC_FTRACE
8395 
8396 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8397 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8398 		  size_t cnt, loff_t *ppos)
8399 {
8400 	ssize_t ret;
8401 	char *buf;
8402 	int r;
8403 
8404 	/* 256 should be plenty to hold the amount needed */
8405 	buf = kmalloc(256, GFP_KERNEL);
8406 	if (!buf)
8407 		return -ENOMEM;
8408 
8409 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8410 		      ftrace_update_tot_cnt,
8411 		      ftrace_number_of_pages,
8412 		      ftrace_number_of_groups);
8413 
8414 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8415 	kfree(buf);
8416 	return ret;
8417 }
8418 
8419 static const struct file_operations tracing_dyn_info_fops = {
8420 	.open		= tracing_open_generic,
8421 	.read		= tracing_read_dyn_info,
8422 	.llseek		= generic_file_llseek,
8423 };
8424 #endif /* CONFIG_DYNAMIC_FTRACE */
8425 
8426 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8427 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8428 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8429 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8430 		void *data)
8431 {
8432 	tracing_snapshot_instance(tr);
8433 }
8434 
8435 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8436 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8437 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8438 		      void *data)
8439 {
8440 	struct ftrace_func_mapper *mapper = data;
8441 	long *count = NULL;
8442 
8443 	if (mapper)
8444 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8445 
8446 	if (count) {
8447 
8448 		if (*count <= 0)
8449 			return;
8450 
8451 		(*count)--;
8452 	}
8453 
8454 	tracing_snapshot_instance(tr);
8455 }
8456 
8457 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8458 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8459 		      struct ftrace_probe_ops *ops, void *data)
8460 {
8461 	struct ftrace_func_mapper *mapper = data;
8462 	long *count = NULL;
8463 
8464 	seq_printf(m, "%ps:", (void *)ip);
8465 
8466 	seq_puts(m, "snapshot");
8467 
8468 	if (mapper)
8469 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8470 
8471 	if (count)
8472 		seq_printf(m, ":count=%ld\n", *count);
8473 	else
8474 		seq_puts(m, ":unlimited\n");
8475 
8476 	return 0;
8477 }
8478 
8479 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8480 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8481 		     unsigned long ip, void *init_data, void **data)
8482 {
8483 	struct ftrace_func_mapper *mapper = *data;
8484 
8485 	if (!mapper) {
8486 		mapper = allocate_ftrace_func_mapper();
8487 		if (!mapper)
8488 			return -ENOMEM;
8489 		*data = mapper;
8490 	}
8491 
8492 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8493 }
8494 
8495 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8496 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8497 		     unsigned long ip, void *data)
8498 {
8499 	struct ftrace_func_mapper *mapper = data;
8500 
8501 	if (!ip) {
8502 		if (!mapper)
8503 			return;
8504 		free_ftrace_func_mapper(mapper, NULL);
8505 		return;
8506 	}
8507 
8508 	ftrace_func_mapper_remove_ip(mapper, ip);
8509 }
8510 
8511 static struct ftrace_probe_ops snapshot_probe_ops = {
8512 	.func			= ftrace_snapshot,
8513 	.print			= ftrace_snapshot_print,
8514 };
8515 
8516 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8517 	.func			= ftrace_count_snapshot,
8518 	.print			= ftrace_snapshot_print,
8519 	.init			= ftrace_snapshot_init,
8520 	.free			= ftrace_snapshot_free,
8521 };
8522 
8523 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8524 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8525 			       char *glob, char *cmd, char *param, int enable)
8526 {
8527 	struct ftrace_probe_ops *ops;
8528 	void *count = (void *)-1;
8529 	char *number;
8530 	int ret;
8531 
8532 	if (!tr)
8533 		return -ENODEV;
8534 
8535 	/* hash funcs only work with set_ftrace_filter */
8536 	if (!enable)
8537 		return -EINVAL;
8538 
8539 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8540 
8541 	if (glob[0] == '!')
8542 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8543 
8544 	if (!param)
8545 		goto out_reg;
8546 
8547 	number = strsep(&param, ":");
8548 
8549 	if (!strlen(number))
8550 		goto out_reg;
8551 
8552 	/*
8553 	 * We use the callback data field (which is a pointer)
8554 	 * as our counter.
8555 	 */
8556 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8557 	if (ret)
8558 		return ret;
8559 
8560  out_reg:
8561 	ret = tracing_alloc_snapshot_instance(tr);
8562 	if (ret < 0)
8563 		goto out;
8564 
8565 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8566 
8567  out:
8568 	return ret < 0 ? ret : 0;
8569 }
8570 
8571 static struct ftrace_func_command ftrace_snapshot_cmd = {
8572 	.name			= "snapshot",
8573 	.func			= ftrace_trace_snapshot_callback,
8574 };
8575 
register_snapshot_cmd(void)8576 static __init int register_snapshot_cmd(void)
8577 {
8578 	return register_ftrace_command(&ftrace_snapshot_cmd);
8579 }
8580 #else
register_snapshot_cmd(void)8581 static inline __init int register_snapshot_cmd(void) { return 0; }
8582 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8583 
tracing_get_dentry(struct trace_array * tr)8584 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8585 {
8586 	if (WARN_ON(!tr->dir))
8587 		return ERR_PTR(-ENODEV);
8588 
8589 	/* Top directory uses NULL as the parent */
8590 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8591 		return NULL;
8592 
8593 	/* All sub buffers have a descriptor */
8594 	return tr->dir;
8595 }
8596 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8597 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8598 {
8599 	struct dentry *d_tracer;
8600 
8601 	if (tr->percpu_dir)
8602 		return tr->percpu_dir;
8603 
8604 	d_tracer = tracing_get_dentry(tr);
8605 	if (IS_ERR(d_tracer))
8606 		return NULL;
8607 
8608 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8609 
8610 	MEM_FAIL(!tr->percpu_dir,
8611 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8612 
8613 	return tr->percpu_dir;
8614 }
8615 
8616 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8617 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8618 		      void *data, long cpu, const struct file_operations *fops)
8619 {
8620 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8621 
8622 	if (ret) /* See tracing_get_cpu() */
8623 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8624 	return ret;
8625 }
8626 
8627 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8628 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8629 {
8630 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8631 	struct dentry *d_cpu;
8632 	char cpu_dir[30]; /* 30 characters should be more than enough */
8633 
8634 	if (!d_percpu)
8635 		return;
8636 
8637 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8638 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8639 	if (!d_cpu) {
8640 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8641 		return;
8642 	}
8643 
8644 	/* per cpu trace_pipe */
8645 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8646 				tr, cpu, &tracing_pipe_fops);
8647 
8648 	/* per cpu trace */
8649 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8650 				tr, cpu, &tracing_fops);
8651 
8652 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8653 				tr, cpu, &tracing_buffers_fops);
8654 
8655 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8656 				tr, cpu, &tracing_stats_fops);
8657 
8658 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8659 				tr, cpu, &tracing_entries_fops);
8660 
8661 #ifdef CONFIG_TRACER_SNAPSHOT
8662 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8663 				tr, cpu, &snapshot_fops);
8664 
8665 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8666 				tr, cpu, &snapshot_raw_fops);
8667 #endif
8668 }
8669 
8670 #ifdef CONFIG_FTRACE_SELFTEST
8671 /* Let selftest have access to static functions in this file */
8672 #include "trace_selftest.c"
8673 #endif
8674 
8675 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8676 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8677 			loff_t *ppos)
8678 {
8679 	struct trace_option_dentry *topt = filp->private_data;
8680 	char *buf;
8681 
8682 	if (topt->flags->val & topt->opt->bit)
8683 		buf = "1\n";
8684 	else
8685 		buf = "0\n";
8686 
8687 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8688 }
8689 
8690 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8691 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8692 			 loff_t *ppos)
8693 {
8694 	struct trace_option_dentry *topt = filp->private_data;
8695 	unsigned long val;
8696 	int ret;
8697 
8698 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8699 	if (ret)
8700 		return ret;
8701 
8702 	if (val != 0 && val != 1)
8703 		return -EINVAL;
8704 
8705 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8706 		mutex_lock(&trace_types_lock);
8707 		ret = __set_tracer_option(topt->tr, topt->flags,
8708 					  topt->opt, !val);
8709 		mutex_unlock(&trace_types_lock);
8710 		if (ret)
8711 			return ret;
8712 	}
8713 
8714 	*ppos += cnt;
8715 
8716 	return cnt;
8717 }
8718 
8719 
8720 static const struct file_operations trace_options_fops = {
8721 	.open = tracing_open_generic,
8722 	.read = trace_options_read,
8723 	.write = trace_options_write,
8724 	.llseek	= generic_file_llseek,
8725 };
8726 
8727 /*
8728  * In order to pass in both the trace_array descriptor as well as the index
8729  * to the flag that the trace option file represents, the trace_array
8730  * has a character array of trace_flags_index[], which holds the index
8731  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8732  * The address of this character array is passed to the flag option file
8733  * read/write callbacks.
8734  *
8735  * In order to extract both the index and the trace_array descriptor,
8736  * get_tr_index() uses the following algorithm.
8737  *
8738  *   idx = *ptr;
8739  *
8740  * As the pointer itself contains the address of the index (remember
8741  * index[1] == 1).
8742  *
8743  * Then to get the trace_array descriptor, by subtracting that index
8744  * from the ptr, we get to the start of the index itself.
8745  *
8746  *   ptr - idx == &index[0]
8747  *
8748  * Then a simple container_of() from that pointer gets us to the
8749  * trace_array descriptor.
8750  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8751 static void get_tr_index(void *data, struct trace_array **ptr,
8752 			 unsigned int *pindex)
8753 {
8754 	*pindex = *(unsigned char *)data;
8755 
8756 	*ptr = container_of(data - *pindex, struct trace_array,
8757 			    trace_flags_index);
8758 }
8759 
8760 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8761 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8762 			loff_t *ppos)
8763 {
8764 	void *tr_index = filp->private_data;
8765 	struct trace_array *tr;
8766 	unsigned int index;
8767 	char *buf;
8768 
8769 	get_tr_index(tr_index, &tr, &index);
8770 
8771 	if (tr->trace_flags & (1 << index))
8772 		buf = "1\n";
8773 	else
8774 		buf = "0\n";
8775 
8776 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8777 }
8778 
8779 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8780 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8781 			 loff_t *ppos)
8782 {
8783 	void *tr_index = filp->private_data;
8784 	struct trace_array *tr;
8785 	unsigned int index;
8786 	unsigned long val;
8787 	int ret;
8788 
8789 	get_tr_index(tr_index, &tr, &index);
8790 
8791 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8792 	if (ret)
8793 		return ret;
8794 
8795 	if (val != 0 && val != 1)
8796 		return -EINVAL;
8797 
8798 	mutex_lock(&event_mutex);
8799 	mutex_lock(&trace_types_lock);
8800 	ret = set_tracer_flag(tr, 1 << index, val);
8801 	mutex_unlock(&trace_types_lock);
8802 	mutex_unlock(&event_mutex);
8803 
8804 	if (ret < 0)
8805 		return ret;
8806 
8807 	*ppos += cnt;
8808 
8809 	return cnt;
8810 }
8811 
8812 static const struct file_operations trace_options_core_fops = {
8813 	.open = tracing_open_generic,
8814 	.read = trace_options_core_read,
8815 	.write = trace_options_core_write,
8816 	.llseek = generic_file_llseek,
8817 };
8818 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8819 struct dentry *trace_create_file(const char *name,
8820 				 umode_t mode,
8821 				 struct dentry *parent,
8822 				 void *data,
8823 				 const struct file_operations *fops)
8824 {
8825 	struct dentry *ret;
8826 
8827 	ret = tracefs_create_file(name, mode, parent, data, fops);
8828 	if (!ret)
8829 		pr_warn("Could not create tracefs '%s' entry\n", name);
8830 
8831 	return ret;
8832 }
8833 
8834 
trace_options_init_dentry(struct trace_array * tr)8835 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8836 {
8837 	struct dentry *d_tracer;
8838 
8839 	if (tr->options)
8840 		return tr->options;
8841 
8842 	d_tracer = tracing_get_dentry(tr);
8843 	if (IS_ERR(d_tracer))
8844 		return NULL;
8845 
8846 	tr->options = tracefs_create_dir("options", d_tracer);
8847 	if (!tr->options) {
8848 		pr_warn("Could not create tracefs directory 'options'\n");
8849 		return NULL;
8850 	}
8851 
8852 	return tr->options;
8853 }
8854 
8855 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8856 create_trace_option_file(struct trace_array *tr,
8857 			 struct trace_option_dentry *topt,
8858 			 struct tracer_flags *flags,
8859 			 struct tracer_opt *opt)
8860 {
8861 	struct dentry *t_options;
8862 
8863 	t_options = trace_options_init_dentry(tr);
8864 	if (!t_options)
8865 		return;
8866 
8867 	topt->flags = flags;
8868 	topt->opt = opt;
8869 	topt->tr = tr;
8870 
8871 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8872 					t_options, topt, &trace_options_fops);
8873 
8874 }
8875 
8876 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8877 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8878 {
8879 	struct trace_option_dentry *topts;
8880 	struct trace_options *tr_topts;
8881 	struct tracer_flags *flags;
8882 	struct tracer_opt *opts;
8883 	int cnt;
8884 	int i;
8885 
8886 	if (!tracer)
8887 		return;
8888 
8889 	flags = tracer->flags;
8890 
8891 	if (!flags || !flags->opts)
8892 		return;
8893 
8894 	/*
8895 	 * If this is an instance, only create flags for tracers
8896 	 * the instance may have.
8897 	 */
8898 	if (!trace_ok_for_array(tracer, tr))
8899 		return;
8900 
8901 	for (i = 0; i < tr->nr_topts; i++) {
8902 		/* Make sure there's no duplicate flags. */
8903 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8904 			return;
8905 	}
8906 
8907 	opts = flags->opts;
8908 
8909 	for (cnt = 0; opts[cnt].name; cnt++)
8910 		;
8911 
8912 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8913 	if (!topts)
8914 		return;
8915 
8916 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8917 			    GFP_KERNEL);
8918 	if (!tr_topts) {
8919 		kfree(topts);
8920 		return;
8921 	}
8922 
8923 	tr->topts = tr_topts;
8924 	tr->topts[tr->nr_topts].tracer = tracer;
8925 	tr->topts[tr->nr_topts].topts = topts;
8926 	tr->nr_topts++;
8927 
8928 	for (cnt = 0; opts[cnt].name; cnt++) {
8929 		create_trace_option_file(tr, &topts[cnt], flags,
8930 					 &opts[cnt]);
8931 		MEM_FAIL(topts[cnt].entry == NULL,
8932 			  "Failed to create trace option: %s",
8933 			  opts[cnt].name);
8934 	}
8935 }
8936 
8937 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8938 create_trace_option_core_file(struct trace_array *tr,
8939 			      const char *option, long index)
8940 {
8941 	struct dentry *t_options;
8942 
8943 	t_options = trace_options_init_dentry(tr);
8944 	if (!t_options)
8945 		return NULL;
8946 
8947 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8948 				 (void *)&tr->trace_flags_index[index],
8949 				 &trace_options_core_fops);
8950 }
8951 
create_trace_options_dir(struct trace_array * tr)8952 static void create_trace_options_dir(struct trace_array *tr)
8953 {
8954 	struct dentry *t_options;
8955 	bool top_level = tr == &global_trace;
8956 	int i;
8957 
8958 	t_options = trace_options_init_dentry(tr);
8959 	if (!t_options)
8960 		return;
8961 
8962 	for (i = 0; trace_options[i]; i++) {
8963 		if (top_level ||
8964 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8965 			create_trace_option_core_file(tr, trace_options[i], i);
8966 	}
8967 }
8968 
8969 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8970 rb_simple_read(struct file *filp, char __user *ubuf,
8971 	       size_t cnt, loff_t *ppos)
8972 {
8973 	struct trace_array *tr = filp->private_data;
8974 	char buf[64];
8975 	int r;
8976 
8977 	r = tracer_tracing_is_on(tr);
8978 	r = sprintf(buf, "%d\n", r);
8979 
8980 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8981 }
8982 
8983 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8984 rb_simple_write(struct file *filp, const char __user *ubuf,
8985 		size_t cnt, loff_t *ppos)
8986 {
8987 	struct trace_array *tr = filp->private_data;
8988 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8989 	unsigned long val;
8990 	int ret;
8991 
8992 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8993 	if (ret)
8994 		return ret;
8995 
8996 	if (buffer) {
8997 		mutex_lock(&trace_types_lock);
8998 		if (!!val == tracer_tracing_is_on(tr)) {
8999 			val = 0; /* do nothing */
9000 		} else if (val) {
9001 			tracer_tracing_on(tr);
9002 			if (tr->current_trace->start)
9003 				tr->current_trace->start(tr);
9004 		} else {
9005 			tracer_tracing_off(tr);
9006 			if (tr->current_trace->stop)
9007 				tr->current_trace->stop(tr);
9008 		}
9009 		mutex_unlock(&trace_types_lock);
9010 	}
9011 
9012 	(*ppos)++;
9013 
9014 	return cnt;
9015 }
9016 
9017 static const struct file_operations rb_simple_fops = {
9018 	.open		= tracing_open_generic_tr,
9019 	.read		= rb_simple_read,
9020 	.write		= rb_simple_write,
9021 	.release	= tracing_release_generic_tr,
9022 	.llseek		= default_llseek,
9023 };
9024 
9025 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9026 buffer_percent_read(struct file *filp, char __user *ubuf,
9027 		    size_t cnt, loff_t *ppos)
9028 {
9029 	struct trace_array *tr = filp->private_data;
9030 	char buf[64];
9031 	int r;
9032 
9033 	r = tr->buffer_percent;
9034 	r = sprintf(buf, "%d\n", r);
9035 
9036 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9037 }
9038 
9039 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9040 buffer_percent_write(struct file *filp, const char __user *ubuf,
9041 		     size_t cnt, loff_t *ppos)
9042 {
9043 	struct trace_array *tr = filp->private_data;
9044 	unsigned long val;
9045 	int ret;
9046 
9047 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9048 	if (ret)
9049 		return ret;
9050 
9051 	if (val > 100)
9052 		return -EINVAL;
9053 
9054 	if (!val)
9055 		val = 1;
9056 
9057 	tr->buffer_percent = val;
9058 
9059 	(*ppos)++;
9060 
9061 	return cnt;
9062 }
9063 
9064 static const struct file_operations buffer_percent_fops = {
9065 	.open		= tracing_open_generic_tr,
9066 	.read		= buffer_percent_read,
9067 	.write		= buffer_percent_write,
9068 	.release	= tracing_release_generic_tr,
9069 	.llseek		= default_llseek,
9070 };
9071 
9072 static struct dentry *trace_instance_dir;
9073 
9074 static void
9075 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9076 
9077 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9078 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9079 {
9080 	enum ring_buffer_flags rb_flags;
9081 
9082 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9083 
9084 	buf->tr = tr;
9085 
9086 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9087 	if (!buf->buffer)
9088 		return -ENOMEM;
9089 
9090 	buf->data = alloc_percpu(struct trace_array_cpu);
9091 	if (!buf->data) {
9092 		ring_buffer_free(buf->buffer);
9093 		buf->buffer = NULL;
9094 		return -ENOMEM;
9095 	}
9096 
9097 	/* Allocate the first page for all buffers */
9098 	set_buffer_entries(&tr->array_buffer,
9099 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9100 
9101 	return 0;
9102 }
9103 
allocate_trace_buffers(struct trace_array * tr,int size)9104 static int allocate_trace_buffers(struct trace_array *tr, int size)
9105 {
9106 	int ret;
9107 
9108 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9109 	if (ret)
9110 		return ret;
9111 
9112 #ifdef CONFIG_TRACER_MAX_TRACE
9113 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9114 				    allocate_snapshot ? size : 1);
9115 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9116 		ring_buffer_free(tr->array_buffer.buffer);
9117 		tr->array_buffer.buffer = NULL;
9118 		free_percpu(tr->array_buffer.data);
9119 		tr->array_buffer.data = NULL;
9120 		return -ENOMEM;
9121 	}
9122 	tr->allocated_snapshot = allocate_snapshot;
9123 
9124 	/*
9125 	 * Only the top level trace array gets its snapshot allocated
9126 	 * from the kernel command line.
9127 	 */
9128 	allocate_snapshot = false;
9129 #endif
9130 
9131 	return 0;
9132 }
9133 
free_trace_buffer(struct array_buffer * buf)9134 static void free_trace_buffer(struct array_buffer *buf)
9135 {
9136 	if (buf->buffer) {
9137 		ring_buffer_free(buf->buffer);
9138 		buf->buffer = NULL;
9139 		free_percpu(buf->data);
9140 		buf->data = NULL;
9141 	}
9142 }
9143 
free_trace_buffers(struct trace_array * tr)9144 static void free_trace_buffers(struct trace_array *tr)
9145 {
9146 	if (!tr)
9147 		return;
9148 
9149 	free_trace_buffer(&tr->array_buffer);
9150 
9151 #ifdef CONFIG_TRACER_MAX_TRACE
9152 	free_trace_buffer(&tr->max_buffer);
9153 #endif
9154 }
9155 
init_trace_flags_index(struct trace_array * tr)9156 static void init_trace_flags_index(struct trace_array *tr)
9157 {
9158 	int i;
9159 
9160 	/* Used by the trace options files */
9161 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9162 		tr->trace_flags_index[i] = i;
9163 }
9164 
__update_tracer_options(struct trace_array * tr)9165 static void __update_tracer_options(struct trace_array *tr)
9166 {
9167 	struct tracer *t;
9168 
9169 	for (t = trace_types; t; t = t->next)
9170 		add_tracer_options(tr, t);
9171 }
9172 
update_tracer_options(struct trace_array * tr)9173 static void update_tracer_options(struct trace_array *tr)
9174 {
9175 	mutex_lock(&trace_types_lock);
9176 	tracer_options_updated = true;
9177 	__update_tracer_options(tr);
9178 	mutex_unlock(&trace_types_lock);
9179 }
9180 
9181 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9182 struct trace_array *trace_array_find(const char *instance)
9183 {
9184 	struct trace_array *tr, *found = NULL;
9185 
9186 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9187 		if (tr->name && strcmp(tr->name, instance) == 0) {
9188 			found = tr;
9189 			break;
9190 		}
9191 	}
9192 
9193 	return found;
9194 }
9195 
trace_array_find_get(const char * instance)9196 struct trace_array *trace_array_find_get(const char *instance)
9197 {
9198 	struct trace_array *tr;
9199 
9200 	mutex_lock(&trace_types_lock);
9201 	tr = trace_array_find(instance);
9202 	if (tr)
9203 		tr->ref++;
9204 	mutex_unlock(&trace_types_lock);
9205 
9206 	return tr;
9207 }
9208 
trace_array_create_dir(struct trace_array * tr)9209 static int trace_array_create_dir(struct trace_array *tr)
9210 {
9211 	int ret;
9212 
9213 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9214 	if (!tr->dir)
9215 		return -EINVAL;
9216 
9217 	ret = event_trace_add_tracer(tr->dir, tr);
9218 	if (ret) {
9219 		tracefs_remove(tr->dir);
9220 		return ret;
9221 	}
9222 
9223 	init_tracer_tracefs(tr, tr->dir);
9224 	__update_tracer_options(tr);
9225 
9226 	return ret;
9227 }
9228 
trace_array_create(const char * name)9229 static struct trace_array *trace_array_create(const char *name)
9230 {
9231 	struct trace_array *tr;
9232 	int ret;
9233 
9234 	ret = -ENOMEM;
9235 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9236 	if (!tr)
9237 		return ERR_PTR(ret);
9238 
9239 	tr->name = kstrdup(name, GFP_KERNEL);
9240 	if (!tr->name)
9241 		goto out_free_tr;
9242 
9243 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9244 		goto out_free_tr;
9245 
9246 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9247 
9248 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9249 
9250 	raw_spin_lock_init(&tr->start_lock);
9251 
9252 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9253 
9254 	tr->current_trace = &nop_trace;
9255 
9256 	INIT_LIST_HEAD(&tr->systems);
9257 	INIT_LIST_HEAD(&tr->events);
9258 	INIT_LIST_HEAD(&tr->hist_vars);
9259 	INIT_LIST_HEAD(&tr->err_log);
9260 
9261 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9262 		goto out_free_tr;
9263 
9264 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9265 		goto out_free_tr;
9266 
9267 	ftrace_init_trace_array(tr);
9268 
9269 	init_trace_flags_index(tr);
9270 
9271 	if (trace_instance_dir) {
9272 		ret = trace_array_create_dir(tr);
9273 		if (ret)
9274 			goto out_free_tr;
9275 	} else
9276 		__trace_early_add_events(tr);
9277 
9278 	list_add(&tr->list, &ftrace_trace_arrays);
9279 
9280 	tr->ref++;
9281 
9282 	return tr;
9283 
9284  out_free_tr:
9285 	ftrace_free_ftrace_ops(tr);
9286 	free_trace_buffers(tr);
9287 	free_cpumask_var(tr->tracing_cpumask);
9288 	kfree(tr->name);
9289 	kfree(tr);
9290 
9291 	return ERR_PTR(ret);
9292 }
9293 
instance_mkdir(const char * name)9294 static int instance_mkdir(const char *name)
9295 {
9296 	struct trace_array *tr;
9297 	int ret;
9298 
9299 	mutex_lock(&event_mutex);
9300 	mutex_lock(&trace_types_lock);
9301 
9302 	ret = -EEXIST;
9303 	if (trace_array_find(name))
9304 		goto out_unlock;
9305 
9306 	tr = trace_array_create(name);
9307 
9308 	ret = PTR_ERR_OR_ZERO(tr);
9309 
9310 out_unlock:
9311 	mutex_unlock(&trace_types_lock);
9312 	mutex_unlock(&event_mutex);
9313 	return ret;
9314 }
9315 
9316 /**
9317  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9318  * @name: The name of the trace array to be looked up/created.
9319  *
9320  * Returns pointer to trace array with given name.
9321  * NULL, if it cannot be created.
9322  *
9323  * NOTE: This function increments the reference counter associated with the
9324  * trace array returned. This makes sure it cannot be freed while in use.
9325  * Use trace_array_put() once the trace array is no longer needed.
9326  * If the trace_array is to be freed, trace_array_destroy() needs to
9327  * be called after the trace_array_put(), or simply let user space delete
9328  * it from the tracefs instances directory. But until the
9329  * trace_array_put() is called, user space can not delete it.
9330  *
9331  */
trace_array_get_by_name(const char * name)9332 struct trace_array *trace_array_get_by_name(const char *name)
9333 {
9334 	struct trace_array *tr;
9335 
9336 	mutex_lock(&event_mutex);
9337 	mutex_lock(&trace_types_lock);
9338 
9339 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9340 		if (tr->name && strcmp(tr->name, name) == 0)
9341 			goto out_unlock;
9342 	}
9343 
9344 	tr = trace_array_create(name);
9345 
9346 	if (IS_ERR(tr))
9347 		tr = NULL;
9348 out_unlock:
9349 	if (tr)
9350 		tr->ref++;
9351 
9352 	mutex_unlock(&trace_types_lock);
9353 	mutex_unlock(&event_mutex);
9354 	return tr;
9355 }
9356 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9357 
__remove_instance(struct trace_array * tr)9358 static int __remove_instance(struct trace_array *tr)
9359 {
9360 	int i;
9361 
9362 	/* Reference counter for a newly created trace array = 1. */
9363 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9364 		return -EBUSY;
9365 
9366 	list_del(&tr->list);
9367 
9368 	/* Disable all the flags that were enabled coming in */
9369 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9370 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9371 			set_tracer_flag(tr, 1 << i, 0);
9372 	}
9373 
9374 	tracing_set_nop(tr);
9375 	clear_ftrace_function_probes(tr);
9376 	event_trace_del_tracer(tr);
9377 	ftrace_clear_pids(tr);
9378 	ftrace_destroy_function_files(tr);
9379 	tracefs_remove(tr->dir);
9380 	free_percpu(tr->last_func_repeats);
9381 	free_trace_buffers(tr);
9382 
9383 	for (i = 0; i < tr->nr_topts; i++) {
9384 		kfree(tr->topts[i].topts);
9385 	}
9386 	kfree(tr->topts);
9387 
9388 	free_cpumask_var(tr->tracing_cpumask);
9389 	kfree(tr->name);
9390 	kfree(tr);
9391 
9392 	return 0;
9393 }
9394 
trace_array_destroy(struct trace_array * this_tr)9395 int trace_array_destroy(struct trace_array *this_tr)
9396 {
9397 	struct trace_array *tr;
9398 	int ret;
9399 
9400 	if (!this_tr)
9401 		return -EINVAL;
9402 
9403 	mutex_lock(&event_mutex);
9404 	mutex_lock(&trace_types_lock);
9405 
9406 	ret = -ENODEV;
9407 
9408 	/* Making sure trace array exists before destroying it. */
9409 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9410 		if (tr == this_tr) {
9411 			ret = __remove_instance(tr);
9412 			break;
9413 		}
9414 	}
9415 
9416 	mutex_unlock(&trace_types_lock);
9417 	mutex_unlock(&event_mutex);
9418 
9419 	return ret;
9420 }
9421 EXPORT_SYMBOL_GPL(trace_array_destroy);
9422 
instance_rmdir(const char * name)9423 static int instance_rmdir(const char *name)
9424 {
9425 	struct trace_array *tr;
9426 	int ret;
9427 
9428 	mutex_lock(&event_mutex);
9429 	mutex_lock(&trace_types_lock);
9430 
9431 	ret = -ENODEV;
9432 	tr = trace_array_find(name);
9433 	if (tr)
9434 		ret = __remove_instance(tr);
9435 
9436 	mutex_unlock(&trace_types_lock);
9437 	mutex_unlock(&event_mutex);
9438 
9439 	return ret;
9440 }
9441 
create_trace_instances(struct dentry * d_tracer)9442 static __init void create_trace_instances(struct dentry *d_tracer)
9443 {
9444 	struct trace_array *tr;
9445 
9446 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9447 							 instance_mkdir,
9448 							 instance_rmdir);
9449 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9450 		return;
9451 
9452 	mutex_lock(&event_mutex);
9453 	mutex_lock(&trace_types_lock);
9454 
9455 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9456 		if (!tr->name)
9457 			continue;
9458 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9459 			     "Failed to create instance directory\n"))
9460 			break;
9461 	}
9462 
9463 	mutex_unlock(&trace_types_lock);
9464 	mutex_unlock(&event_mutex);
9465 }
9466 
9467 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9468 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9469 {
9470 	struct trace_event_file *file;
9471 	int cpu;
9472 
9473 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9474 			tr, &show_traces_fops);
9475 
9476 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9477 			tr, &set_tracer_fops);
9478 
9479 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9480 			  tr, &tracing_cpumask_fops);
9481 
9482 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9483 			  tr, &tracing_iter_fops);
9484 
9485 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9486 			  tr, &tracing_fops);
9487 
9488 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9489 			  tr, &tracing_pipe_fops);
9490 
9491 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9492 			  tr, &tracing_entries_fops);
9493 
9494 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9495 			  tr, &tracing_total_entries_fops);
9496 
9497 	trace_create_file("free_buffer", 0200, d_tracer,
9498 			  tr, &tracing_free_buffer_fops);
9499 
9500 	trace_create_file("trace_marker", 0220, d_tracer,
9501 			  tr, &tracing_mark_fops);
9502 
9503 	file = __find_event_file(tr, "ftrace", "print");
9504 	if (file && file->dir)
9505 		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9506 				  file, &event_trigger_fops);
9507 	tr->trace_marker_file = file;
9508 
9509 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9510 			  tr, &tracing_mark_raw_fops);
9511 
9512 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9513 			  &trace_clock_fops);
9514 
9515 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9516 			  tr, &rb_simple_fops);
9517 
9518 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9519 			  &trace_time_stamp_mode_fops);
9520 
9521 	tr->buffer_percent = 50;
9522 
9523 	trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9524 			tr, &buffer_percent_fops);
9525 
9526 	create_trace_options_dir(tr);
9527 
9528 	trace_create_maxlat_file(tr, d_tracer);
9529 
9530 	if (ftrace_create_function_files(tr, d_tracer))
9531 		MEM_FAIL(1, "Could not allocate function filter files");
9532 
9533 #ifdef CONFIG_TRACER_SNAPSHOT
9534 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9535 			  tr, &snapshot_fops);
9536 #endif
9537 
9538 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9539 			  tr, &tracing_err_log_fops);
9540 
9541 	for_each_tracing_cpu(cpu)
9542 		tracing_init_tracefs_percpu(tr, cpu);
9543 
9544 	ftrace_init_tracefs(tr, d_tracer);
9545 }
9546 
trace_automount(struct dentry * mntpt,void * ingore)9547 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9548 {
9549 	struct vfsmount *mnt;
9550 	struct file_system_type *type;
9551 
9552 	/*
9553 	 * To maintain backward compatibility for tools that mount
9554 	 * debugfs to get to the tracing facility, tracefs is automatically
9555 	 * mounted to the debugfs/tracing directory.
9556 	 */
9557 	type = get_fs_type("tracefs");
9558 	if (!type)
9559 		return NULL;
9560 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9561 	put_filesystem(type);
9562 	if (IS_ERR(mnt))
9563 		return NULL;
9564 	mntget(mnt);
9565 
9566 	return mnt;
9567 }
9568 
9569 /**
9570  * tracing_init_dentry - initialize top level trace array
9571  *
9572  * This is called when creating files or directories in the tracing
9573  * directory. It is called via fs_initcall() by any of the boot up code
9574  * and expects to return the dentry of the top level tracing directory.
9575  */
tracing_init_dentry(void)9576 int tracing_init_dentry(void)
9577 {
9578 	struct trace_array *tr = &global_trace;
9579 
9580 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9581 		pr_warn("Tracing disabled due to lockdown\n");
9582 		return -EPERM;
9583 	}
9584 
9585 	/* The top level trace array uses  NULL as parent */
9586 	if (tr->dir)
9587 		return 0;
9588 
9589 	if (WARN_ON(!tracefs_initialized()))
9590 		return -ENODEV;
9591 
9592 	/*
9593 	 * As there may still be users that expect the tracing
9594 	 * files to exist in debugfs/tracing, we must automount
9595 	 * the tracefs file system there, so older tools still
9596 	 * work with the newer kernel.
9597 	 */
9598 	tr->dir = debugfs_create_automount("tracing", NULL,
9599 					   trace_automount, NULL);
9600 
9601 	return 0;
9602 }
9603 
9604 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9605 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9606 
9607 static struct workqueue_struct *eval_map_wq __initdata;
9608 static struct work_struct eval_map_work __initdata;
9609 static struct work_struct tracerfs_init_work __initdata;
9610 
eval_map_work_func(struct work_struct * work)9611 static void __init eval_map_work_func(struct work_struct *work)
9612 {
9613 	int len;
9614 
9615 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9616 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9617 }
9618 
trace_eval_init(void)9619 static int __init trace_eval_init(void)
9620 {
9621 	INIT_WORK(&eval_map_work, eval_map_work_func);
9622 
9623 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9624 	if (!eval_map_wq) {
9625 		pr_err("Unable to allocate eval_map_wq\n");
9626 		/* Do work here */
9627 		eval_map_work_func(&eval_map_work);
9628 		return -ENOMEM;
9629 	}
9630 
9631 	queue_work(eval_map_wq, &eval_map_work);
9632 	return 0;
9633 }
9634 
9635 subsys_initcall(trace_eval_init);
9636 
trace_eval_sync(void)9637 static int __init trace_eval_sync(void)
9638 {
9639 	/* Make sure the eval map updates are finished */
9640 	if (eval_map_wq)
9641 		destroy_workqueue(eval_map_wq);
9642 	return 0;
9643 }
9644 
9645 late_initcall_sync(trace_eval_sync);
9646 
9647 
9648 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9649 static void trace_module_add_evals(struct module *mod)
9650 {
9651 	if (!mod->num_trace_evals)
9652 		return;
9653 
9654 	/*
9655 	 * Modules with bad taint do not have events created, do
9656 	 * not bother with enums either.
9657 	 */
9658 	if (trace_module_has_bad_taint(mod))
9659 		return;
9660 
9661 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9662 }
9663 
9664 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9665 static void trace_module_remove_evals(struct module *mod)
9666 {
9667 	union trace_eval_map_item *map;
9668 	union trace_eval_map_item **last = &trace_eval_maps;
9669 
9670 	if (!mod->num_trace_evals)
9671 		return;
9672 
9673 	mutex_lock(&trace_eval_mutex);
9674 
9675 	map = trace_eval_maps;
9676 
9677 	while (map) {
9678 		if (map->head.mod == mod)
9679 			break;
9680 		map = trace_eval_jmp_to_tail(map);
9681 		last = &map->tail.next;
9682 		map = map->tail.next;
9683 	}
9684 	if (!map)
9685 		goto out;
9686 
9687 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9688 	kfree(map);
9689  out:
9690 	mutex_unlock(&trace_eval_mutex);
9691 }
9692 #else
trace_module_remove_evals(struct module * mod)9693 static inline void trace_module_remove_evals(struct module *mod) { }
9694 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9695 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9696 static int trace_module_notify(struct notifier_block *self,
9697 			       unsigned long val, void *data)
9698 {
9699 	struct module *mod = data;
9700 
9701 	switch (val) {
9702 	case MODULE_STATE_COMING:
9703 		trace_module_add_evals(mod);
9704 		break;
9705 	case MODULE_STATE_GOING:
9706 		trace_module_remove_evals(mod);
9707 		break;
9708 	}
9709 
9710 	return NOTIFY_OK;
9711 }
9712 
9713 static struct notifier_block trace_module_nb = {
9714 	.notifier_call = trace_module_notify,
9715 	.priority = 0,
9716 };
9717 #endif /* CONFIG_MODULES */
9718 
tracer_init_tracefs_work_func(struct work_struct * work)9719 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9720 {
9721 
9722 	event_trace_init();
9723 
9724 	init_tracer_tracefs(&global_trace, NULL);
9725 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9726 
9727 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9728 			&global_trace, &tracing_thresh_fops);
9729 
9730 	trace_create_file("README", TRACE_MODE_READ, NULL,
9731 			NULL, &tracing_readme_fops);
9732 
9733 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9734 			NULL, &tracing_saved_cmdlines_fops);
9735 
9736 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9737 			  NULL, &tracing_saved_cmdlines_size_fops);
9738 
9739 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9740 			NULL, &tracing_saved_tgids_fops);
9741 
9742 	trace_create_eval_file(NULL);
9743 
9744 #ifdef CONFIG_MODULES
9745 	register_module_notifier(&trace_module_nb);
9746 #endif
9747 
9748 #ifdef CONFIG_DYNAMIC_FTRACE
9749 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9750 			NULL, &tracing_dyn_info_fops);
9751 #endif
9752 
9753 	create_trace_instances(NULL);
9754 
9755 	update_tracer_options(&global_trace);
9756 }
9757 
tracer_init_tracefs(void)9758 static __init int tracer_init_tracefs(void)
9759 {
9760 	int ret;
9761 
9762 	trace_access_lock_init();
9763 
9764 	ret = tracing_init_dentry();
9765 	if (ret)
9766 		return 0;
9767 
9768 	if (eval_map_wq) {
9769 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9770 		queue_work(eval_map_wq, &tracerfs_init_work);
9771 	} else {
9772 		tracer_init_tracefs_work_func(NULL);
9773 	}
9774 
9775 	return 0;
9776 }
9777 
9778 fs_initcall(tracer_init_tracefs);
9779 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9780 static int trace_panic_handler(struct notifier_block *this,
9781 			       unsigned long event, void *unused)
9782 {
9783 	if (ftrace_dump_on_oops)
9784 		ftrace_dump(ftrace_dump_on_oops);
9785 	return NOTIFY_OK;
9786 }
9787 
9788 static struct notifier_block trace_panic_notifier = {
9789 	.notifier_call  = trace_panic_handler,
9790 	.next           = NULL,
9791 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9792 };
9793 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9794 static int trace_die_handler(struct notifier_block *self,
9795 			     unsigned long val,
9796 			     void *data)
9797 {
9798 	switch (val) {
9799 	case DIE_OOPS:
9800 		if (ftrace_dump_on_oops)
9801 			ftrace_dump(ftrace_dump_on_oops);
9802 		break;
9803 	default:
9804 		break;
9805 	}
9806 	return NOTIFY_OK;
9807 }
9808 
9809 static struct notifier_block trace_die_notifier = {
9810 	.notifier_call = trace_die_handler,
9811 	.priority = 200
9812 };
9813 
9814 /*
9815  * printk is set to max of 1024, we really don't need it that big.
9816  * Nothing should be printing 1000 characters anyway.
9817  */
9818 #define TRACE_MAX_PRINT		1000
9819 
9820 /*
9821  * Define here KERN_TRACE so that we have one place to modify
9822  * it if we decide to change what log level the ftrace dump
9823  * should be at.
9824  */
9825 #define KERN_TRACE		KERN_EMERG
9826 
9827 void
trace_printk_seq(struct trace_seq * s)9828 trace_printk_seq(struct trace_seq *s)
9829 {
9830 	/* Probably should print a warning here. */
9831 	if (s->seq.len >= TRACE_MAX_PRINT)
9832 		s->seq.len = TRACE_MAX_PRINT;
9833 
9834 	/*
9835 	 * More paranoid code. Although the buffer size is set to
9836 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9837 	 * an extra layer of protection.
9838 	 */
9839 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9840 		s->seq.len = s->seq.size - 1;
9841 
9842 	/* should be zero ended, but we are paranoid. */
9843 	s->buffer[s->seq.len] = 0;
9844 
9845 	printk(KERN_TRACE "%s", s->buffer);
9846 
9847 	trace_seq_init(s);
9848 }
9849 
trace_init_global_iter(struct trace_iterator * iter)9850 void trace_init_global_iter(struct trace_iterator *iter)
9851 {
9852 	iter->tr = &global_trace;
9853 	iter->trace = iter->tr->current_trace;
9854 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9855 	iter->array_buffer = &global_trace.array_buffer;
9856 
9857 	if (iter->trace && iter->trace->open)
9858 		iter->trace->open(iter);
9859 
9860 	/* Annotate start of buffers if we had overruns */
9861 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9862 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9863 
9864 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9865 	if (trace_clocks[iter->tr->clock_id].in_ns)
9866 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9867 
9868 	/* Can not use kmalloc for iter.temp and iter.fmt */
9869 	iter->temp = static_temp_buf;
9870 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
9871 	iter->fmt = static_fmt_buf;
9872 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
9873 }
9874 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9875 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9876 {
9877 	/* use static because iter can be a bit big for the stack */
9878 	static struct trace_iterator iter;
9879 	static atomic_t dump_running;
9880 	struct trace_array *tr = &global_trace;
9881 	unsigned int old_userobj;
9882 	unsigned long flags;
9883 	int cnt = 0, cpu;
9884 
9885 	/* Only allow one dump user at a time. */
9886 	if (atomic_inc_return(&dump_running) != 1) {
9887 		atomic_dec(&dump_running);
9888 		return;
9889 	}
9890 
9891 	/*
9892 	 * Always turn off tracing when we dump.
9893 	 * We don't need to show trace output of what happens
9894 	 * between multiple crashes.
9895 	 *
9896 	 * If the user does a sysrq-z, then they can re-enable
9897 	 * tracing with echo 1 > tracing_on.
9898 	 */
9899 	tracing_off();
9900 
9901 	local_irq_save(flags);
9902 
9903 	/* Simulate the iterator */
9904 	trace_init_global_iter(&iter);
9905 
9906 	for_each_tracing_cpu(cpu) {
9907 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9908 	}
9909 
9910 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9911 
9912 	/* don't look at user memory in panic mode */
9913 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9914 
9915 	switch (oops_dump_mode) {
9916 	case DUMP_ALL:
9917 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9918 		break;
9919 	case DUMP_ORIG:
9920 		iter.cpu_file = raw_smp_processor_id();
9921 		break;
9922 	case DUMP_NONE:
9923 		goto out_enable;
9924 	default:
9925 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9926 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9927 	}
9928 
9929 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9930 
9931 	/* Did function tracer already get disabled? */
9932 	if (ftrace_is_dead()) {
9933 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9934 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9935 	}
9936 
9937 	/*
9938 	 * We need to stop all tracing on all CPUS to read
9939 	 * the next buffer. This is a bit expensive, but is
9940 	 * not done often. We fill all what we can read,
9941 	 * and then release the locks again.
9942 	 */
9943 
9944 	while (!trace_empty(&iter)) {
9945 
9946 		if (!cnt)
9947 			printk(KERN_TRACE "---------------------------------\n");
9948 
9949 		cnt++;
9950 
9951 		trace_iterator_reset(&iter);
9952 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9953 
9954 		if (trace_find_next_entry_inc(&iter) != NULL) {
9955 			int ret;
9956 
9957 			ret = print_trace_line(&iter);
9958 			if (ret != TRACE_TYPE_NO_CONSUME)
9959 				trace_consume(&iter);
9960 		}
9961 		touch_nmi_watchdog();
9962 
9963 		trace_printk_seq(&iter.seq);
9964 	}
9965 
9966 	if (!cnt)
9967 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9968 	else
9969 		printk(KERN_TRACE "---------------------------------\n");
9970 
9971  out_enable:
9972 	tr->trace_flags |= old_userobj;
9973 
9974 	for_each_tracing_cpu(cpu) {
9975 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9976 	}
9977 	atomic_dec(&dump_running);
9978 	local_irq_restore(flags);
9979 }
9980 EXPORT_SYMBOL_GPL(ftrace_dump);
9981 
9982 #define WRITE_BUFSIZE  4096
9983 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))9984 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9985 				size_t count, loff_t *ppos,
9986 				int (*createfn)(const char *))
9987 {
9988 	char *kbuf, *buf, *tmp;
9989 	int ret = 0;
9990 	size_t done = 0;
9991 	size_t size;
9992 
9993 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9994 	if (!kbuf)
9995 		return -ENOMEM;
9996 
9997 	while (done < count) {
9998 		size = count - done;
9999 
10000 		if (size >= WRITE_BUFSIZE)
10001 			size = WRITE_BUFSIZE - 1;
10002 
10003 		if (copy_from_user(kbuf, buffer + done, size)) {
10004 			ret = -EFAULT;
10005 			goto out;
10006 		}
10007 		kbuf[size] = '\0';
10008 		buf = kbuf;
10009 		do {
10010 			tmp = strchr(buf, '\n');
10011 			if (tmp) {
10012 				*tmp = '\0';
10013 				size = tmp - buf + 1;
10014 			} else {
10015 				size = strlen(buf);
10016 				if (done + size < count) {
10017 					if (buf != kbuf)
10018 						break;
10019 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10020 					pr_warn("Line length is too long: Should be less than %d\n",
10021 						WRITE_BUFSIZE - 2);
10022 					ret = -EINVAL;
10023 					goto out;
10024 				}
10025 			}
10026 			done += size;
10027 
10028 			/* Remove comments */
10029 			tmp = strchr(buf, '#');
10030 
10031 			if (tmp)
10032 				*tmp = '\0';
10033 
10034 			ret = createfn(buf);
10035 			if (ret)
10036 				goto out;
10037 			buf += size;
10038 
10039 		} while (done < count);
10040 	}
10041 	ret = done;
10042 
10043 out:
10044 	kfree(kbuf);
10045 
10046 	return ret;
10047 }
10048 
tracer_alloc_buffers(void)10049 __init static int tracer_alloc_buffers(void)
10050 {
10051 	int ring_buf_size;
10052 	int ret = -ENOMEM;
10053 
10054 
10055 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10056 		pr_warn("Tracing disabled due to lockdown\n");
10057 		return -EPERM;
10058 	}
10059 
10060 	/*
10061 	 * Make sure we don't accidentally add more trace options
10062 	 * than we have bits for.
10063 	 */
10064 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10065 
10066 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10067 		goto out;
10068 
10069 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10070 		goto out_free_buffer_mask;
10071 
10072 	/* Only allocate trace_printk buffers if a trace_printk exists */
10073 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10074 		/* Must be called before global_trace.buffer is allocated */
10075 		trace_printk_init_buffers();
10076 
10077 	/* To save memory, keep the ring buffer size to its minimum */
10078 	if (ring_buffer_expanded)
10079 		ring_buf_size = trace_buf_size;
10080 	else
10081 		ring_buf_size = 1;
10082 
10083 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10084 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10085 
10086 	raw_spin_lock_init(&global_trace.start_lock);
10087 
10088 	/*
10089 	 * The prepare callbacks allocates some memory for the ring buffer. We
10090 	 * don't free the buffer if the CPU goes down. If we were to free
10091 	 * the buffer, then the user would lose any trace that was in the
10092 	 * buffer. The memory will be removed once the "instance" is removed.
10093 	 */
10094 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10095 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10096 				      NULL);
10097 	if (ret < 0)
10098 		goto out_free_cpumask;
10099 	/* Used for event triggers */
10100 	ret = -ENOMEM;
10101 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10102 	if (!temp_buffer)
10103 		goto out_rm_hp_state;
10104 
10105 	if (trace_create_savedcmd() < 0)
10106 		goto out_free_temp_buffer;
10107 
10108 	/* TODO: make the number of buffers hot pluggable with CPUS */
10109 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10110 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10111 		goto out_free_savedcmd;
10112 	}
10113 
10114 	if (global_trace.buffer_disabled)
10115 		tracing_off();
10116 
10117 	if (trace_boot_clock) {
10118 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10119 		if (ret < 0)
10120 			pr_warn("Trace clock %s not defined, going back to default\n",
10121 				trace_boot_clock);
10122 	}
10123 
10124 	/*
10125 	 * register_tracer() might reference current_trace, so it
10126 	 * needs to be set before we register anything. This is
10127 	 * just a bootstrap of current_trace anyway.
10128 	 */
10129 	global_trace.current_trace = &nop_trace;
10130 
10131 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10132 
10133 	ftrace_init_global_array_ops(&global_trace);
10134 
10135 	init_trace_flags_index(&global_trace);
10136 
10137 	register_tracer(&nop_trace);
10138 
10139 	/* Function tracing may start here (via kernel command line) */
10140 	init_function_trace();
10141 
10142 	/* All seems OK, enable tracing */
10143 	tracing_disabled = 0;
10144 
10145 	atomic_notifier_chain_register(&panic_notifier_list,
10146 				       &trace_panic_notifier);
10147 
10148 	register_die_notifier(&trace_die_notifier);
10149 
10150 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10151 
10152 	INIT_LIST_HEAD(&global_trace.systems);
10153 	INIT_LIST_HEAD(&global_trace.events);
10154 	INIT_LIST_HEAD(&global_trace.hist_vars);
10155 	INIT_LIST_HEAD(&global_trace.err_log);
10156 	list_add(&global_trace.list, &ftrace_trace_arrays);
10157 
10158 	apply_trace_boot_options();
10159 
10160 	register_snapshot_cmd();
10161 
10162 	test_can_verify();
10163 
10164 	return 0;
10165 
10166 out_free_savedcmd:
10167 	free_saved_cmdlines_buffer(savedcmd);
10168 out_free_temp_buffer:
10169 	ring_buffer_free(temp_buffer);
10170 out_rm_hp_state:
10171 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10172 out_free_cpumask:
10173 	free_cpumask_var(global_trace.tracing_cpumask);
10174 out_free_buffer_mask:
10175 	free_cpumask_var(tracing_buffer_mask);
10176 out:
10177 	return ret;
10178 }
10179 
ftrace_boot_snapshot(void)10180 void __init ftrace_boot_snapshot(void)
10181 {
10182 	if (snapshot_at_boot) {
10183 		tracing_snapshot();
10184 		internal_trace_puts("** Boot snapshot taken **\n");
10185 	}
10186 }
10187 
early_trace_init(void)10188 void __init early_trace_init(void)
10189 {
10190 	if (tracepoint_printk) {
10191 		tracepoint_print_iter =
10192 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10193 		if (MEM_FAIL(!tracepoint_print_iter,
10194 			     "Failed to allocate trace iterator\n"))
10195 			tracepoint_printk = 0;
10196 		else
10197 			static_key_enable(&tracepoint_printk_key.key);
10198 	}
10199 	tracer_alloc_buffers();
10200 }
10201 
trace_init(void)10202 void __init trace_init(void)
10203 {
10204 	trace_event_init();
10205 }
10206 
clear_boot_tracer(void)10207 __init static void clear_boot_tracer(void)
10208 {
10209 	/*
10210 	 * The default tracer at boot buffer is an init section.
10211 	 * This function is called in lateinit. If we did not
10212 	 * find the boot tracer, then clear it out, to prevent
10213 	 * later registration from accessing the buffer that is
10214 	 * about to be freed.
10215 	 */
10216 	if (!default_bootup_tracer)
10217 		return;
10218 
10219 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10220 	       default_bootup_tracer);
10221 	default_bootup_tracer = NULL;
10222 }
10223 
10224 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10225 __init static void tracing_set_default_clock(void)
10226 {
10227 	/* sched_clock_stable() is determined in late_initcall */
10228 	if (!trace_boot_clock && !sched_clock_stable()) {
10229 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10230 			pr_warn("Can not set tracing clock due to lockdown\n");
10231 			return;
10232 		}
10233 
10234 		printk(KERN_WARNING
10235 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10236 		       "If you want to keep using the local clock, then add:\n"
10237 		       "  \"trace_clock=local\"\n"
10238 		       "on the kernel command line\n");
10239 		tracing_set_clock(&global_trace, "global");
10240 	}
10241 }
10242 #else
tracing_set_default_clock(void)10243 static inline void tracing_set_default_clock(void) { }
10244 #endif
10245 
late_trace_init(void)10246 __init static int late_trace_init(void)
10247 {
10248 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10249 		static_key_disable(&tracepoint_printk_key.key);
10250 		tracepoint_printk = 0;
10251 	}
10252 
10253 	tracing_set_default_clock();
10254 	clear_boot_tracer();
10255 	return 0;
10256 }
10257 
10258 late_initcall_sync(late_trace_init);
10259