1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 
42 #include "trace.h"
43 #include "trace_output.h"
44 
45 /*
46  * On boot up, the ring buffer is set to the minimum size, so that
47  * we do not waste memory on systems that are not using tracing.
48  */
49 int ring_buffer_expanded;
50 
51 /*
52  * We need to change this state when a selftest is running.
53  * A selftest will lurk into the ring-buffer to count the
54  * entries inserted during the selftest although some concurrent
55  * insertions into the ring-buffer such as trace_printk could occurred
56  * at the same time, giving false positive or negative results.
57  */
58 static bool __read_mostly tracing_selftest_running;
59 
60 /*
61  * If a tracer is running, we do not want to run SELFTEST.
62  */
63 bool __read_mostly tracing_selftest_disabled;
64 
65 /* For tracers that don't implement custom flags */
66 static struct tracer_opt dummy_tracer_opt[] = {
67 	{ }
68 };
69 
70 static struct tracer_flags dummy_tracer_flags = {
71 	.val = 0,
72 	.opts = dummy_tracer_opt
73 };
74 
dummy_set_flag(u32 old_flags,u32 bit,int set)75 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
76 {
77 	return 0;
78 }
79 
80 /*
81  * Kill all tracing for good (never come back).
82  * It is initialized to 1 but will turn to zero if the initialization
83  * of the tracer is successful. But that is the only place that sets
84  * this back to zero.
85  */
86 static int tracing_disabled = 1;
87 
88 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
89 
ftrace_disable_cpu(void)90 static inline void ftrace_disable_cpu(void)
91 {
92 	preempt_disable();
93 	__this_cpu_inc(ftrace_cpu_disabled);
94 }
95 
ftrace_enable_cpu(void)96 static inline void ftrace_enable_cpu(void)
97 {
98 	__this_cpu_dec(ftrace_cpu_disabled);
99 	preempt_enable();
100 }
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 static int tracing_set_tracer(const char *buf);
123 
124 #define MAX_TRACER_SIZE		100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127 
set_cmdline_ftrace(char * str)128 static int __init set_cmdline_ftrace(char *str)
129 {
130 	strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
131 	default_bootup_tracer = bootup_tracer_buf;
132 	/* We are using ftrace early, expand it */
133 	ring_buffer_expanded = 1;
134 	return 1;
135 }
136 __setup("ftrace=", set_cmdline_ftrace);
137 
set_ftrace_dump_on_oops(char * str)138 static int __init set_ftrace_dump_on_oops(char *str)
139 {
140 	if (*str++ != '=' || !*str) {
141 		ftrace_dump_on_oops = DUMP_ALL;
142 		return 1;
143 	}
144 
145 	if (!strcmp("orig_cpu", str)) {
146 		ftrace_dump_on_oops = DUMP_ORIG;
147                 return 1;
148         }
149 
150         return 0;
151 }
152 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
153 
ns2usecs(cycle_t nsec)154 unsigned long long ns2usecs(cycle_t nsec)
155 {
156 	nsec += 500;
157 	do_div(nsec, 1000);
158 	return nsec;
159 }
160 
161 /*
162  * The global_trace is the descriptor that holds the tracing
163  * buffers for the live tracing. For each CPU, it contains
164  * a link list of pages that will store trace entries. The
165  * page descriptor of the pages in the memory is used to hold
166  * the link list by linking the lru item in the page descriptor
167  * to each of the pages in the buffer per CPU.
168  *
169  * For each active CPU there is a data field that holds the
170  * pages for the buffer for that CPU. Each CPU has the same number
171  * of pages allocated for its buffer.
172  */
173 static struct trace_array	global_trace;
174 
175 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
176 
filter_current_check_discard(struct ring_buffer * buffer,struct ftrace_event_call * call,void * rec,struct ring_buffer_event * event)177 int filter_current_check_discard(struct ring_buffer *buffer,
178 				 struct ftrace_event_call *call, void *rec,
179 				 struct ring_buffer_event *event)
180 {
181 	return filter_check_discard(call, rec, buffer, event);
182 }
183 EXPORT_SYMBOL_GPL(filter_current_check_discard);
184 
ftrace_now(int cpu)185 cycle_t ftrace_now(int cpu)
186 {
187 	u64 ts;
188 
189 	/* Early boot up does not have a buffer yet */
190 	if (!global_trace.buffer)
191 		return trace_clock_local();
192 
193 	ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
194 	ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
195 
196 	return ts;
197 }
198 
199 /*
200  * The max_tr is used to snapshot the global_trace when a maximum
201  * latency is reached. Some tracers will use this to store a maximum
202  * trace while it continues examining live traces.
203  *
204  * The buffers for the max_tr are set up the same as the global_trace.
205  * When a snapshot is taken, the link list of the max_tr is swapped
206  * with the link list of the global_trace and the buffers are reset for
207  * the global_trace so the tracing can continue.
208  */
209 static struct trace_array	max_tr;
210 
211 static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
212 
213 /* tracer_enabled is used to toggle activation of a tracer */
214 static int			tracer_enabled = 1;
215 
216 /**
217  * tracing_is_enabled - return tracer_enabled status
218  *
219  * This function is used by other tracers to know the status
220  * of the tracer_enabled flag.  Tracers may use this function
221  * to know if it should enable their features when starting
222  * up. See irqsoff tracer for an example (start_irqsoff_tracer).
223  */
tracing_is_enabled(void)224 int tracing_is_enabled(void)
225 {
226 	return tracer_enabled;
227 }
228 
229 /*
230  * trace_buf_size is the size in bytes that is allocated
231  * for a buffer. Note, the number of bytes is always rounded
232  * to page size.
233  *
234  * This number is purposely set to a low number of 16384.
235  * If the dump on oops happens, it will be much appreciated
236  * to not have to wait for all that output. Anyway this can be
237  * boot time and run time configurable.
238  */
239 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
240 
241 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
242 
243 /* trace_types holds a link list of available tracers. */
244 static struct tracer		*trace_types __read_mostly;
245 
246 /* current_trace points to the tracer that is currently active */
247 static struct tracer		*current_trace __read_mostly;
248 
249 /*
250  * trace_types_lock is used to protect the trace_types list.
251  */
252 static DEFINE_MUTEX(trace_types_lock);
253 
254 /*
255  * serialize the access of the ring buffer
256  *
257  * ring buffer serializes readers, but it is low level protection.
258  * The validity of the events (which returns by ring_buffer_peek() ..etc)
259  * are not protected by ring buffer.
260  *
261  * The content of events may become garbage if we allow other process consumes
262  * these events concurrently:
263  *   A) the page of the consumed events may become a normal page
264  *      (not reader page) in ring buffer, and this page will be rewrited
265  *      by events producer.
266  *   B) The page of the consumed events may become a page for splice_read,
267  *      and this page will be returned to system.
268  *
269  * These primitives allow multi process access to different cpu ring buffer
270  * concurrently.
271  *
272  * These primitives don't distinguish read-only and read-consume access.
273  * Multi read-only access are also serialized.
274  */
275 
276 #ifdef CONFIG_SMP
277 static DECLARE_RWSEM(all_cpu_access_lock);
278 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
279 
trace_access_lock(int cpu)280 static inline void trace_access_lock(int cpu)
281 {
282 	if (cpu == TRACE_PIPE_ALL_CPU) {
283 		/* gain it for accessing the whole ring buffer. */
284 		down_write(&all_cpu_access_lock);
285 	} else {
286 		/* gain it for accessing a cpu ring buffer. */
287 
288 		/* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
289 		down_read(&all_cpu_access_lock);
290 
291 		/* Secondly block other access to this @cpu ring buffer. */
292 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
293 	}
294 }
295 
trace_access_unlock(int cpu)296 static inline void trace_access_unlock(int cpu)
297 {
298 	if (cpu == TRACE_PIPE_ALL_CPU) {
299 		up_write(&all_cpu_access_lock);
300 	} else {
301 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
302 		up_read(&all_cpu_access_lock);
303 	}
304 }
305 
trace_access_lock_init(void)306 static inline void trace_access_lock_init(void)
307 {
308 	int cpu;
309 
310 	for_each_possible_cpu(cpu)
311 		mutex_init(&per_cpu(cpu_access_lock, cpu));
312 }
313 
314 #else
315 
316 static DEFINE_MUTEX(access_lock);
317 
trace_access_lock(int cpu)318 static inline void trace_access_lock(int cpu)
319 {
320 	(void)cpu;
321 	mutex_lock(&access_lock);
322 }
323 
trace_access_unlock(int cpu)324 static inline void trace_access_unlock(int cpu)
325 {
326 	(void)cpu;
327 	mutex_unlock(&access_lock);
328 }
329 
trace_access_lock_init(void)330 static inline void trace_access_lock_init(void)
331 {
332 }
333 
334 #endif
335 
336 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
337 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
338 
339 /* trace_flags holds trace_options default values */
340 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
341 	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
342 	TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
343 	TRACE_ITER_IRQ_INFO;
344 
345 static int trace_stop_count;
346 static DEFINE_RAW_SPINLOCK(tracing_start_lock);
347 
wakeup_work_handler(struct work_struct * work)348 static void wakeup_work_handler(struct work_struct *work)
349 {
350 	wake_up(&trace_wait);
351 }
352 
353 static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
354 
355 /**
356  * tracing_on - enable tracing buffers
357  *
358  * This function enables tracing buffers that may have been
359  * disabled with tracing_off.
360  */
tracing_on(void)361 void tracing_on(void)
362 {
363 	if (global_trace.buffer)
364 		ring_buffer_record_on(global_trace.buffer);
365 	/*
366 	 * This flag is only looked at when buffers haven't been
367 	 * allocated yet. We don't really care about the race
368 	 * between setting this flag and actually turning
369 	 * on the buffer.
370 	 */
371 	global_trace.buffer_disabled = 0;
372 }
373 EXPORT_SYMBOL_GPL(tracing_on);
374 
375 /**
376  * tracing_off - turn off tracing buffers
377  *
378  * This function stops the tracing buffers from recording data.
379  * It does not disable any overhead the tracers themselves may
380  * be causing. This function simply causes all recording to
381  * the ring buffers to fail.
382  */
tracing_off(void)383 void tracing_off(void)
384 {
385 	if (global_trace.buffer)
386 		ring_buffer_record_off(global_trace.buffer);
387 	/*
388 	 * This flag is only looked at when buffers haven't been
389 	 * allocated yet. We don't really care about the race
390 	 * between setting this flag and actually turning
391 	 * on the buffer.
392 	 */
393 	global_trace.buffer_disabled = 1;
394 }
395 EXPORT_SYMBOL_GPL(tracing_off);
396 
397 /**
398  * tracing_is_on - show state of ring buffers enabled
399  */
tracing_is_on(void)400 int tracing_is_on(void)
401 {
402 	if (global_trace.buffer)
403 		return ring_buffer_record_is_on(global_trace.buffer);
404 	return !global_trace.buffer_disabled;
405 }
406 EXPORT_SYMBOL_GPL(tracing_is_on);
407 
408 /**
409  * trace_wake_up - wake up tasks waiting for trace input
410  *
411  * Schedules a delayed work to wake up any task that is blocked on the
412  * trace_wait queue. These is used with trace_poll for tasks polling the
413  * trace.
414  */
trace_wake_up(void)415 void trace_wake_up(void)
416 {
417 	const unsigned long delay = msecs_to_jiffies(2);
418 
419 	if (trace_flags & TRACE_ITER_BLOCK)
420 		return;
421 	schedule_delayed_work(&wakeup_work, delay);
422 }
423 
set_buf_size(char * str)424 static int __init set_buf_size(char *str)
425 {
426 	unsigned long buf_size;
427 
428 	if (!str)
429 		return 0;
430 	buf_size = memparse(str, &str);
431 	/* nr_entries can not be zero */
432 	if (buf_size == 0)
433 		return 0;
434 	trace_buf_size = buf_size;
435 	return 1;
436 }
437 __setup("trace_buf_size=", set_buf_size);
438 
set_tracing_thresh(char * str)439 static int __init set_tracing_thresh(char *str)
440 {
441 	unsigned long threshhold;
442 	int ret;
443 
444 	if (!str)
445 		return 0;
446 	ret = strict_strtoul(str, 0, &threshhold);
447 	if (ret < 0)
448 		return 0;
449 	tracing_thresh = threshhold * 1000;
450 	return 1;
451 }
452 __setup("tracing_thresh=", set_tracing_thresh);
453 
nsecs_to_usecs(unsigned long nsecs)454 unsigned long nsecs_to_usecs(unsigned long nsecs)
455 {
456 	return nsecs / 1000;
457 }
458 
459 /* These must match the bit postions in trace_iterator_flags */
460 static const char *trace_options[] = {
461 	"print-parent",
462 	"sym-offset",
463 	"sym-addr",
464 	"verbose",
465 	"raw",
466 	"hex",
467 	"bin",
468 	"block",
469 	"stacktrace",
470 	"trace_printk",
471 	"ftrace_preempt",
472 	"branch",
473 	"annotate",
474 	"userstacktrace",
475 	"sym-userobj",
476 	"printk-msg-only",
477 	"context-info",
478 	"latency-format",
479 	"sleep-time",
480 	"graph-time",
481 	"record-cmd",
482 	"overwrite",
483 	"disable_on_free",
484 	"irq-info",
485 	NULL
486 };
487 
488 static struct {
489 	u64 (*func)(void);
490 	const char *name;
491 } trace_clocks[] = {
492 	{ trace_clock_local,	"local" },
493 	{ trace_clock_global,	"global" },
494 	{ trace_clock_counter,	"counter" },
495 };
496 
497 int trace_clock_id;
498 
499 /*
500  * trace_parser_get_init - gets the buffer for trace parser
501  */
trace_parser_get_init(struct trace_parser * parser,int size)502 int trace_parser_get_init(struct trace_parser *parser, int size)
503 {
504 	memset(parser, 0, sizeof(*parser));
505 
506 	parser->buffer = kmalloc(size, GFP_KERNEL);
507 	if (!parser->buffer)
508 		return 1;
509 
510 	parser->size = size;
511 	return 0;
512 }
513 
514 /*
515  * trace_parser_put - frees the buffer for trace parser
516  */
trace_parser_put(struct trace_parser * parser)517 void trace_parser_put(struct trace_parser *parser)
518 {
519 	kfree(parser->buffer);
520 }
521 
522 /*
523  * trace_get_user - reads the user input string separated by  space
524  * (matched by isspace(ch))
525  *
526  * For each string found the 'struct trace_parser' is updated,
527  * and the function returns.
528  *
529  * Returns number of bytes read.
530  *
531  * See kernel/trace/trace.h for 'struct trace_parser' details.
532  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)533 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
534 	size_t cnt, loff_t *ppos)
535 {
536 	char ch;
537 	size_t read = 0;
538 	ssize_t ret;
539 
540 	if (!*ppos)
541 		trace_parser_clear(parser);
542 
543 	ret = get_user(ch, ubuf++);
544 	if (ret)
545 		goto out;
546 
547 	read++;
548 	cnt--;
549 
550 	/*
551 	 * The parser is not finished with the last write,
552 	 * continue reading the user input without skipping spaces.
553 	 */
554 	if (!parser->cont) {
555 		/* skip white space */
556 		while (cnt && isspace(ch)) {
557 			ret = get_user(ch, ubuf++);
558 			if (ret)
559 				goto out;
560 			read++;
561 			cnt--;
562 		}
563 
564 		/* only spaces were written */
565 		if (isspace(ch)) {
566 			*ppos += read;
567 			ret = read;
568 			goto out;
569 		}
570 
571 		parser->idx = 0;
572 	}
573 
574 	/* read the non-space input */
575 	while (cnt && !isspace(ch)) {
576 		if (parser->idx < parser->size - 1)
577 			parser->buffer[parser->idx++] = ch;
578 		else {
579 			ret = -EINVAL;
580 			goto out;
581 		}
582 		ret = get_user(ch, ubuf++);
583 		if (ret)
584 			goto out;
585 		read++;
586 		cnt--;
587 	}
588 
589 	/* We either got finished input or we have to wait for another call. */
590 	if (isspace(ch)) {
591 		parser->buffer[parser->idx] = 0;
592 		parser->cont = false;
593 	} else if (parser->idx < parser->size - 1) {
594 		parser->cont = true;
595 		parser->buffer[parser->idx++] = ch;
596 	} else {
597 		ret = -EINVAL;
598 		goto out;
599 	}
600 
601 	*ppos += read;
602 	ret = read;
603 
604 out:
605 	return ret;
606 }
607 
trace_seq_to_user(struct trace_seq * s,char __user * ubuf,size_t cnt)608 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
609 {
610 	int len;
611 	int ret;
612 
613 	if (!cnt)
614 		return 0;
615 
616 	if (s->len <= s->readpos)
617 		return -EBUSY;
618 
619 	len = s->len - s->readpos;
620 	if (cnt > len)
621 		cnt = len;
622 	ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
623 	if (ret == cnt)
624 		return -EFAULT;
625 
626 	cnt -= ret;
627 
628 	s->readpos += cnt;
629 	return cnt;
630 }
631 
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)632 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
633 {
634 	int len;
635 	void *ret;
636 
637 	if (s->len <= s->readpos)
638 		return -EBUSY;
639 
640 	len = s->len - s->readpos;
641 	if (cnt > len)
642 		cnt = len;
643 	ret = memcpy(buf, s->buffer + s->readpos, cnt);
644 	if (!ret)
645 		return -EFAULT;
646 
647 	s->readpos += cnt;
648 	return cnt;
649 }
650 
651 /*
652  * ftrace_max_lock is used to protect the swapping of buffers
653  * when taking a max snapshot. The buffers themselves are
654  * protected by per_cpu spinlocks. But the action of the swap
655  * needs its own lock.
656  *
657  * This is defined as a arch_spinlock_t in order to help
658  * with performance when lockdep debugging is enabled.
659  *
660  * It is also used in other places outside the update_max_tr
661  * so it needs to be defined outside of the
662  * CONFIG_TRACER_MAX_TRACE.
663  */
664 static arch_spinlock_t ftrace_max_lock =
665 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
666 
667 unsigned long __read_mostly	tracing_thresh;
668 
669 #ifdef CONFIG_TRACER_MAX_TRACE
670 unsigned long __read_mostly	tracing_max_latency;
671 
672 /*
673  * Copy the new maximum trace into the separate maximum-trace
674  * structure. (this way the maximum trace is permanently saved,
675  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
676  */
677 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)678 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
679 {
680 	struct trace_array_cpu *data = tr->data[cpu];
681 	struct trace_array_cpu *max_data;
682 
683 	max_tr.cpu = cpu;
684 	max_tr.time_start = data->preempt_timestamp;
685 
686 	max_data = max_tr.data[cpu];
687 	max_data->saved_latency = tracing_max_latency;
688 	max_data->critical_start = data->critical_start;
689 	max_data->critical_end = data->critical_end;
690 
691 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
692 	max_data->pid = tsk->pid;
693 	/*
694 	 * If tsk == current, then use current_uid(), as that does not use
695 	 * RCU. The irq tracer can be called out of RCU scope.
696 	 */
697 	if (tsk == current)
698 		max_data->uid = current_uid();
699 	else
700 		max_data->uid = task_uid(tsk);
701 
702 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
703 	max_data->policy = tsk->policy;
704 	max_data->rt_priority = tsk->rt_priority;
705 
706 	/* record this tasks comm */
707 	tracing_record_cmdline(tsk);
708 }
709 
710 /**
711  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
712  * @tr: tracer
713  * @tsk: the task with the latency
714  * @cpu: The cpu that initiated the trace.
715  *
716  * Flip the buffers between the @tr and the max_tr and record information
717  * about which task was the cause of this latency.
718  */
719 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)720 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
721 {
722 	struct ring_buffer *buf;
723 
724 	if (trace_stop_count)
725 		return;
726 
727 	WARN_ON_ONCE(!irqs_disabled());
728 	if (!current_trace->use_max_tr) {
729 		WARN_ON_ONCE(1);
730 		return;
731 	}
732 	arch_spin_lock(&ftrace_max_lock);
733 
734 	buf = tr->buffer;
735 	tr->buffer = max_tr.buffer;
736 	max_tr.buffer = buf;
737 
738 	__update_max_tr(tr, tsk, cpu);
739 	arch_spin_unlock(&ftrace_max_lock);
740 }
741 
742 /**
743  * update_max_tr_single - only copy one trace over, and reset the rest
744  * @tr - tracer
745  * @tsk - task with the latency
746  * @cpu - the cpu of the buffer to copy.
747  *
748  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
749  */
750 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)751 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
752 {
753 	int ret;
754 
755 	if (trace_stop_count)
756 		return;
757 
758 	WARN_ON_ONCE(!irqs_disabled());
759 	if (!current_trace->use_max_tr) {
760 		WARN_ON_ONCE(1);
761 		return;
762 	}
763 
764 	arch_spin_lock(&ftrace_max_lock);
765 
766 	ftrace_disable_cpu();
767 
768 	ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
769 
770 	if (ret == -EBUSY) {
771 		/*
772 		 * We failed to swap the buffer due to a commit taking
773 		 * place on this CPU. We fail to record, but we reset
774 		 * the max trace buffer (no one writes directly to it)
775 		 * and flag that it failed.
776 		 */
777 		trace_array_printk(&max_tr, _THIS_IP_,
778 			"Failed to swap buffers due to commit in progress\n");
779 	}
780 
781 	ftrace_enable_cpu();
782 
783 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
784 
785 	__update_max_tr(tr, tsk, cpu);
786 	arch_spin_unlock(&ftrace_max_lock);
787 }
788 #endif /* CONFIG_TRACER_MAX_TRACE */
789 
790 /**
791  * register_tracer - register a tracer with the ftrace system.
792  * @type - the plugin for the tracer
793  *
794  * Register a new plugin tracer.
795  */
register_tracer(struct tracer * type)796 int register_tracer(struct tracer *type)
797 __releases(kernel_lock)
798 __acquires(kernel_lock)
799 {
800 	struct tracer *t;
801 	int ret = 0;
802 
803 	if (!type->name) {
804 		pr_info("Tracer must have a name\n");
805 		return -1;
806 	}
807 
808 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
809 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
810 		return -1;
811 	}
812 
813 	mutex_lock(&trace_types_lock);
814 
815 	tracing_selftest_running = true;
816 
817 	for (t = trace_types; t; t = t->next) {
818 		if (strcmp(type->name, t->name) == 0) {
819 			/* already found */
820 			pr_info("Tracer %s already registered\n",
821 				type->name);
822 			ret = -1;
823 			goto out;
824 		}
825 	}
826 
827 	if (!type->set_flag)
828 		type->set_flag = &dummy_set_flag;
829 	if (!type->flags)
830 		type->flags = &dummy_tracer_flags;
831 	else
832 		if (!type->flags->opts)
833 			type->flags->opts = dummy_tracer_opt;
834 	if (!type->wait_pipe)
835 		type->wait_pipe = default_wait_pipe;
836 
837 
838 #ifdef CONFIG_FTRACE_STARTUP_TEST
839 	if (type->selftest && !tracing_selftest_disabled) {
840 		struct tracer *saved_tracer = current_trace;
841 		struct trace_array *tr = &global_trace;
842 
843 		/*
844 		 * Run a selftest on this tracer.
845 		 * Here we reset the trace buffer, and set the current
846 		 * tracer to be this tracer. The tracer can then run some
847 		 * internal tracing to verify that everything is in order.
848 		 * If we fail, we do not register this tracer.
849 		 */
850 		tracing_reset_online_cpus(tr);
851 
852 		current_trace = type;
853 
854 		/* If we expanded the buffers, make sure the max is expanded too */
855 		if (ring_buffer_expanded && type->use_max_tr)
856 			ring_buffer_resize(max_tr.buffer, trace_buf_size);
857 
858 		/* the test is responsible for initializing and enabling */
859 		pr_info("Testing tracer %s: ", type->name);
860 		ret = type->selftest(type, tr);
861 		/* the test is responsible for resetting too */
862 		current_trace = saved_tracer;
863 		if (ret) {
864 			printk(KERN_CONT "FAILED!\n");
865 			goto out;
866 		}
867 		/* Only reset on passing, to avoid touching corrupted buffers */
868 		tracing_reset_online_cpus(tr);
869 
870 		/* Shrink the max buffer again */
871 		if (ring_buffer_expanded && type->use_max_tr)
872 			ring_buffer_resize(max_tr.buffer, 1);
873 
874 		printk(KERN_CONT "PASSED\n");
875 	}
876 #endif
877 
878 	type->next = trace_types;
879 	trace_types = type;
880 
881  out:
882 	tracing_selftest_running = false;
883 	mutex_unlock(&trace_types_lock);
884 
885 	if (ret || !default_bootup_tracer)
886 		goto out_unlock;
887 
888 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
889 		goto out_unlock;
890 
891 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
892 	/* Do we want this tracer to start on bootup? */
893 	tracing_set_tracer(type->name);
894 	default_bootup_tracer = NULL;
895 	/* disable other selftests, since this will break it. */
896 	tracing_selftest_disabled = 1;
897 #ifdef CONFIG_FTRACE_STARTUP_TEST
898 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
899 	       type->name);
900 #endif
901 
902  out_unlock:
903 	return ret;
904 }
905 
unregister_tracer(struct tracer * type)906 void unregister_tracer(struct tracer *type)
907 {
908 	struct tracer **t;
909 
910 	mutex_lock(&trace_types_lock);
911 	for (t = &trace_types; *t; t = &(*t)->next) {
912 		if (*t == type)
913 			goto found;
914 	}
915 	pr_info("Tracer %s not registered\n", type->name);
916 	goto out;
917 
918  found:
919 	*t = (*t)->next;
920 
921 	if (type == current_trace && tracer_enabled) {
922 		tracer_enabled = 0;
923 		tracing_stop();
924 		if (current_trace->stop)
925 			current_trace->stop(&global_trace);
926 		current_trace = &nop_trace;
927 	}
928 out:
929 	mutex_unlock(&trace_types_lock);
930 }
931 
__tracing_reset(struct ring_buffer * buffer,int cpu)932 static void __tracing_reset(struct ring_buffer *buffer, int cpu)
933 {
934 	ftrace_disable_cpu();
935 	ring_buffer_reset_cpu(buffer, cpu);
936 	ftrace_enable_cpu();
937 }
938 
tracing_reset(struct trace_array * tr,int cpu)939 void tracing_reset(struct trace_array *tr, int cpu)
940 {
941 	struct ring_buffer *buffer = tr->buffer;
942 
943 	ring_buffer_record_disable(buffer);
944 
945 	/* Make sure all commits have finished */
946 	synchronize_sched();
947 	__tracing_reset(buffer, cpu);
948 
949 	ring_buffer_record_enable(buffer);
950 }
951 
tracing_reset_online_cpus(struct trace_array * tr)952 void tracing_reset_online_cpus(struct trace_array *tr)
953 {
954 	struct ring_buffer *buffer = tr->buffer;
955 	int cpu;
956 
957 	ring_buffer_record_disable(buffer);
958 
959 	/* Make sure all commits have finished */
960 	synchronize_sched();
961 
962 	tr->time_start = ftrace_now(tr->cpu);
963 
964 	for_each_online_cpu(cpu)
965 		__tracing_reset(buffer, cpu);
966 
967 	ring_buffer_record_enable(buffer);
968 }
969 
tracing_reset_current(int cpu)970 void tracing_reset_current(int cpu)
971 {
972 	tracing_reset(&global_trace, cpu);
973 }
974 
tracing_reset_current_online_cpus(void)975 void tracing_reset_current_online_cpus(void)
976 {
977 	tracing_reset_online_cpus(&global_trace);
978 }
979 
980 #define SAVED_CMDLINES 128
981 #define NO_CMDLINE_MAP UINT_MAX
982 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
983 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
984 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
985 static int cmdline_idx;
986 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
987 
988 /* temporary disable recording */
989 static atomic_t trace_record_cmdline_disabled __read_mostly;
990 
trace_init_cmdlines(void)991 static void trace_init_cmdlines(void)
992 {
993 	memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
994 	memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
995 	cmdline_idx = 0;
996 }
997 
is_tracing_stopped(void)998 int is_tracing_stopped(void)
999 {
1000 	return trace_stop_count;
1001 }
1002 
1003 /**
1004  * ftrace_off_permanent - disable all ftrace code permanently
1005  *
1006  * This should only be called when a serious anomally has
1007  * been detected.  This will turn off the function tracing,
1008  * ring buffers, and other tracing utilites. It takes no
1009  * locks and can be called from any context.
1010  */
ftrace_off_permanent(void)1011 void ftrace_off_permanent(void)
1012 {
1013 	tracing_disabled = 1;
1014 	ftrace_stop();
1015 	tracing_off_permanent();
1016 }
1017 
1018 /**
1019  * tracing_start - quick start of the tracer
1020  *
1021  * If tracing is enabled but was stopped by tracing_stop,
1022  * this will start the tracer back up.
1023  */
tracing_start(void)1024 void tracing_start(void)
1025 {
1026 	struct ring_buffer *buffer;
1027 	unsigned long flags;
1028 
1029 	if (tracing_disabled)
1030 		return;
1031 
1032 	raw_spin_lock_irqsave(&tracing_start_lock, flags);
1033 	if (--trace_stop_count) {
1034 		if (trace_stop_count < 0) {
1035 			/* Someone screwed up their debugging */
1036 			WARN_ON_ONCE(1);
1037 			trace_stop_count = 0;
1038 		}
1039 		goto out;
1040 	}
1041 
1042 	/* Prevent the buffers from switching */
1043 	arch_spin_lock(&ftrace_max_lock);
1044 
1045 	buffer = global_trace.buffer;
1046 	if (buffer)
1047 		ring_buffer_record_enable(buffer);
1048 
1049 	buffer = max_tr.buffer;
1050 	if (buffer)
1051 		ring_buffer_record_enable(buffer);
1052 
1053 	arch_spin_unlock(&ftrace_max_lock);
1054 
1055  out:
1056 	raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
1057 }
1058 
1059 /**
1060  * tracing_stop - quick stop of the tracer
1061  *
1062  * Light weight way to stop tracing. Use in conjunction with
1063  * tracing_start.
1064  */
tracing_stop(void)1065 void tracing_stop(void)
1066 {
1067 	struct ring_buffer *buffer;
1068 	unsigned long flags;
1069 
1070 	raw_spin_lock_irqsave(&tracing_start_lock, flags);
1071 	if (trace_stop_count++)
1072 		goto out;
1073 
1074 	/* Prevent the buffers from switching */
1075 	arch_spin_lock(&ftrace_max_lock);
1076 
1077 	buffer = global_trace.buffer;
1078 	if (buffer)
1079 		ring_buffer_record_disable(buffer);
1080 
1081 	buffer = max_tr.buffer;
1082 	if (buffer)
1083 		ring_buffer_record_disable(buffer);
1084 
1085 	arch_spin_unlock(&ftrace_max_lock);
1086 
1087  out:
1088 	raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
1089 }
1090 
1091 void trace_stop_cmdline_recording(void);
1092 
trace_save_cmdline(struct task_struct * tsk)1093 static void trace_save_cmdline(struct task_struct *tsk)
1094 {
1095 	unsigned pid, idx;
1096 
1097 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1098 		return;
1099 
1100 	/*
1101 	 * It's not the end of the world if we don't get
1102 	 * the lock, but we also don't want to spin
1103 	 * nor do we want to disable interrupts,
1104 	 * so if we miss here, then better luck next time.
1105 	 */
1106 	if (!arch_spin_trylock(&trace_cmdline_lock))
1107 		return;
1108 
1109 	idx = map_pid_to_cmdline[tsk->pid];
1110 	if (idx == NO_CMDLINE_MAP) {
1111 		idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1112 
1113 		/*
1114 		 * Check whether the cmdline buffer at idx has a pid
1115 		 * mapped. We are going to overwrite that entry so we
1116 		 * need to clear the map_pid_to_cmdline. Otherwise we
1117 		 * would read the new comm for the old pid.
1118 		 */
1119 		pid = map_cmdline_to_pid[idx];
1120 		if (pid != NO_CMDLINE_MAP)
1121 			map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1122 
1123 		map_cmdline_to_pid[idx] = tsk->pid;
1124 		map_pid_to_cmdline[tsk->pid] = idx;
1125 
1126 		cmdline_idx = idx;
1127 	}
1128 
1129 	memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1130 
1131 	arch_spin_unlock(&trace_cmdline_lock);
1132 }
1133 
trace_find_cmdline(int pid,char comm[])1134 void trace_find_cmdline(int pid, char comm[])
1135 {
1136 	unsigned map;
1137 
1138 	if (!pid) {
1139 		strcpy(comm, "<idle>");
1140 		return;
1141 	}
1142 
1143 	if (WARN_ON_ONCE(pid < 0)) {
1144 		strcpy(comm, "<XXX>");
1145 		return;
1146 	}
1147 
1148 	if (pid > PID_MAX_DEFAULT) {
1149 		strcpy(comm, "<...>");
1150 		return;
1151 	}
1152 
1153 	preempt_disable();
1154 	arch_spin_lock(&trace_cmdline_lock);
1155 	map = map_pid_to_cmdline[pid];
1156 	if (map != NO_CMDLINE_MAP)
1157 		strcpy(comm, saved_cmdlines[map]);
1158 	else
1159 		strcpy(comm, "<...>");
1160 
1161 	arch_spin_unlock(&trace_cmdline_lock);
1162 	preempt_enable();
1163 }
1164 
tracing_record_cmdline(struct task_struct * tsk)1165 void tracing_record_cmdline(struct task_struct *tsk)
1166 {
1167 	if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
1168 	    !tracing_is_on())
1169 		return;
1170 
1171 	trace_save_cmdline(tsk);
1172 }
1173 
1174 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned long flags,int pc)1175 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1176 			     int pc)
1177 {
1178 	struct task_struct *tsk = current;
1179 
1180 	entry->preempt_count		= pc & 0xff;
1181 	entry->pid			= (tsk) ? tsk->pid : 0;
1182 	entry->padding			= 0;
1183 	entry->flags =
1184 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1185 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1186 #else
1187 		TRACE_FLAG_IRQS_NOSUPPORT |
1188 #endif
1189 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1190 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1191 		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
1192 }
1193 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1194 
1195 struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)1196 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1197 			  int type,
1198 			  unsigned long len,
1199 			  unsigned long flags, int pc)
1200 {
1201 	struct ring_buffer_event *event;
1202 
1203 	event = ring_buffer_lock_reserve(buffer, len);
1204 	if (event != NULL) {
1205 		struct trace_entry *ent = ring_buffer_event_data(event);
1206 
1207 		tracing_generic_entry_update(ent, flags, pc);
1208 		ent->type = type;
1209 	}
1210 
1211 	return event;
1212 }
1213 
1214 static inline void
__trace_buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,int wake)1215 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1216 			     struct ring_buffer_event *event,
1217 			     unsigned long flags, int pc,
1218 			     int wake)
1219 {
1220 	ring_buffer_unlock_commit(buffer, event);
1221 
1222 	ftrace_trace_stack(buffer, flags, 6, pc);
1223 	ftrace_trace_userstack(buffer, flags, pc);
1224 
1225 	if (wake)
1226 		trace_wake_up();
1227 }
1228 
trace_buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc)1229 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1230 				struct ring_buffer_event *event,
1231 				unsigned long flags, int pc)
1232 {
1233 	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
1234 }
1235 
1236 struct ring_buffer_event *
trace_current_buffer_lock_reserve(struct ring_buffer ** current_rb,int type,unsigned long len,unsigned long flags,int pc)1237 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1238 				  int type, unsigned long len,
1239 				  unsigned long flags, int pc)
1240 {
1241 	*current_rb = global_trace.buffer;
1242 	return trace_buffer_lock_reserve(*current_rb,
1243 					 type, len, flags, pc);
1244 }
1245 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1246 
trace_current_buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc)1247 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1248 					struct ring_buffer_event *event,
1249 					unsigned long flags, int pc)
1250 {
1251 	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
1252 }
1253 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1254 
trace_nowake_buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc)1255 void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
1256 				       struct ring_buffer_event *event,
1257 				       unsigned long flags, int pc)
1258 {
1259 	__trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
1260 }
1261 EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
1262 
trace_nowake_buffer_unlock_commit_regs(struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)1263 void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1264 					    struct ring_buffer_event *event,
1265 					    unsigned long flags, int pc,
1266 					    struct pt_regs *regs)
1267 {
1268 	ring_buffer_unlock_commit(buffer, event);
1269 
1270 	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1271 	ftrace_trace_userstack(buffer, flags, pc);
1272 }
1273 EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
1274 
trace_current_buffer_discard_commit(struct ring_buffer * buffer,struct ring_buffer_event * event)1275 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1276 					 struct ring_buffer_event *event)
1277 {
1278 	ring_buffer_discard_commit(buffer, event);
1279 }
1280 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1281 
1282 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)1283 trace_function(struct trace_array *tr,
1284 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
1285 	       int pc)
1286 {
1287 	struct ftrace_event_call *call = &event_function;
1288 	struct ring_buffer *buffer = tr->buffer;
1289 	struct ring_buffer_event *event;
1290 	struct ftrace_entry *entry;
1291 
1292 	/* If we are reading the ring buffer, don't trace */
1293 	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1294 		return;
1295 
1296 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1297 					  flags, pc);
1298 	if (!event)
1299 		return;
1300 	entry	= ring_buffer_event_data(event);
1301 	entry->ip			= ip;
1302 	entry->parent_ip		= parent_ip;
1303 
1304 	if (!filter_check_discard(call, entry, buffer, event))
1305 		ring_buffer_unlock_commit(buffer, event);
1306 }
1307 
1308 void
ftrace(struct trace_array * tr,struct trace_array_cpu * data,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)1309 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1310        unsigned long ip, unsigned long parent_ip, unsigned long flags,
1311        int pc)
1312 {
1313 	if (likely(!atomic_read(&data->disabled)))
1314 		trace_function(tr, ip, parent_ip, flags, pc);
1315 }
1316 
1317 #ifdef CONFIG_STACKTRACE
1318 
1319 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1320 struct ftrace_stack {
1321 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
1322 };
1323 
1324 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1325 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1326 
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)1327 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1328 				 unsigned long flags,
1329 				 int skip, int pc, struct pt_regs *regs)
1330 {
1331 	struct ftrace_event_call *call = &event_kernel_stack;
1332 	struct ring_buffer_event *event;
1333 	struct stack_entry *entry;
1334 	struct stack_trace trace;
1335 	int use_stack;
1336 	int size = FTRACE_STACK_ENTRIES;
1337 
1338 	trace.nr_entries	= 0;
1339 	trace.skip		= skip;
1340 
1341 	/*
1342 	 * Since events can happen in NMIs there's no safe way to
1343 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1344 	 * or NMI comes in, it will just have to use the default
1345 	 * FTRACE_STACK_SIZE.
1346 	 */
1347 	preempt_disable_notrace();
1348 
1349 	use_stack = ++__get_cpu_var(ftrace_stack_reserve);
1350 	/*
1351 	 * We don't need any atomic variables, just a barrier.
1352 	 * If an interrupt comes in, we don't care, because it would
1353 	 * have exited and put the counter back to what we want.
1354 	 * We just need a barrier to keep gcc from moving things
1355 	 * around.
1356 	 */
1357 	barrier();
1358 	if (use_stack == 1) {
1359 		trace.entries		= &__get_cpu_var(ftrace_stack).calls[0];
1360 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
1361 
1362 		if (regs)
1363 			save_stack_trace_regs(regs, &trace);
1364 		else
1365 			save_stack_trace(&trace);
1366 
1367 		if (trace.nr_entries > size)
1368 			size = trace.nr_entries;
1369 	} else
1370 		/* From now on, use_stack is a boolean */
1371 		use_stack = 0;
1372 
1373 	size *= sizeof(unsigned long);
1374 
1375 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1376 					  sizeof(*entry) + size, flags, pc);
1377 	if (!event)
1378 		goto out;
1379 	entry = ring_buffer_event_data(event);
1380 
1381 	memset(&entry->caller, 0, size);
1382 
1383 	if (use_stack)
1384 		memcpy(&entry->caller, trace.entries,
1385 		       trace.nr_entries * sizeof(unsigned long));
1386 	else {
1387 		trace.max_entries	= FTRACE_STACK_ENTRIES;
1388 		trace.entries		= entry->caller;
1389 		if (regs)
1390 			save_stack_trace_regs(regs, &trace);
1391 		else
1392 			save_stack_trace(&trace);
1393 	}
1394 
1395 	entry->size = trace.nr_entries;
1396 
1397 	if (!filter_check_discard(call, entry, buffer, event))
1398 		ring_buffer_unlock_commit(buffer, event);
1399 
1400  out:
1401 	/* Again, don't let gcc optimize things here */
1402 	barrier();
1403 	__get_cpu_var(ftrace_stack_reserve)--;
1404 	preempt_enable_notrace();
1405 
1406 }
1407 
ftrace_trace_stack_regs(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)1408 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1409 			     int skip, int pc, struct pt_regs *regs)
1410 {
1411 	if (!(trace_flags & TRACE_ITER_STACKTRACE))
1412 		return;
1413 
1414 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
1415 }
1416 
ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc)1417 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1418 			int skip, int pc)
1419 {
1420 	if (!(trace_flags & TRACE_ITER_STACKTRACE))
1421 		return;
1422 
1423 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1424 }
1425 
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)1426 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1427 		   int pc)
1428 {
1429 	__ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
1430 }
1431 
1432 /**
1433  * trace_dump_stack - record a stack back trace in the trace buffer
1434  */
trace_dump_stack(void)1435 void trace_dump_stack(void)
1436 {
1437 	unsigned long flags;
1438 
1439 	if (tracing_disabled || tracing_selftest_running)
1440 		return;
1441 
1442 	local_save_flags(flags);
1443 
1444 	/* skipping 3 traces, seems to get us at the caller of this function */
1445 	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
1446 }
1447 
1448 static DEFINE_PER_CPU(int, user_stack_count);
1449 
1450 void
ftrace_trace_userstack(struct ring_buffer * buffer,unsigned long flags,int pc)1451 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1452 {
1453 	struct ftrace_event_call *call = &event_user_stack;
1454 	struct ring_buffer_event *event;
1455 	struct userstack_entry *entry;
1456 	struct stack_trace trace;
1457 
1458 	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1459 		return;
1460 
1461 	/*
1462 	 * NMIs can not handle page faults, even with fix ups.
1463 	 * The save user stack can (and often does) fault.
1464 	 */
1465 	if (unlikely(in_nmi()))
1466 		return;
1467 
1468 	/*
1469 	 * prevent recursion, since the user stack tracing may
1470 	 * trigger other kernel events.
1471 	 */
1472 	preempt_disable();
1473 	if (__this_cpu_read(user_stack_count))
1474 		goto out;
1475 
1476 	__this_cpu_inc(user_stack_count);
1477 
1478 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1479 					  sizeof(*entry), flags, pc);
1480 	if (!event)
1481 		goto out_drop_count;
1482 	entry	= ring_buffer_event_data(event);
1483 
1484 	entry->tgid		= current->tgid;
1485 	memset(&entry->caller, 0, sizeof(entry->caller));
1486 
1487 	trace.nr_entries	= 0;
1488 	trace.max_entries	= FTRACE_STACK_ENTRIES;
1489 	trace.skip		= 0;
1490 	trace.entries		= entry->caller;
1491 
1492 	save_stack_trace_user(&trace);
1493 	if (!filter_check_discard(call, entry, buffer, event))
1494 		ring_buffer_unlock_commit(buffer, event);
1495 
1496  out_drop_count:
1497 	__this_cpu_dec(user_stack_count);
1498  out:
1499 	preempt_enable();
1500 }
1501 
1502 #ifdef UNUSED
__trace_userstack(struct trace_array * tr,unsigned long flags)1503 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1504 {
1505 	ftrace_trace_userstack(tr, flags, preempt_count());
1506 }
1507 #endif /* UNUSED */
1508 
1509 #endif /* CONFIG_STACKTRACE */
1510 
1511 /**
1512  * trace_vbprintk - write binary msg to tracing buffer
1513  *
1514  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)1515 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1516 {
1517 	static arch_spinlock_t trace_buf_lock =
1518 		(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1519 	static u32 trace_buf[TRACE_BUF_SIZE];
1520 
1521 	struct ftrace_event_call *call = &event_bprint;
1522 	struct ring_buffer_event *event;
1523 	struct ring_buffer *buffer;
1524 	struct trace_array *tr = &global_trace;
1525 	struct trace_array_cpu *data;
1526 	struct bprint_entry *entry;
1527 	unsigned long flags;
1528 	int disable;
1529 	int cpu, len = 0, size, pc;
1530 
1531 	if (unlikely(tracing_selftest_running || tracing_disabled))
1532 		return 0;
1533 
1534 	/* Don't pollute graph traces with trace_vprintk internals */
1535 	pause_graph_tracing();
1536 
1537 	pc = preempt_count();
1538 	preempt_disable_notrace();
1539 	cpu = raw_smp_processor_id();
1540 	data = tr->data[cpu];
1541 
1542 	disable = atomic_inc_return(&data->disabled);
1543 	if (unlikely(disable != 1))
1544 		goto out;
1545 
1546 	/* Lockdep uses trace_printk for lock tracing */
1547 	local_irq_save(flags);
1548 	arch_spin_lock(&trace_buf_lock);
1549 	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1550 
1551 	if (len > TRACE_BUF_SIZE || len < 0)
1552 		goto out_unlock;
1553 
1554 	size = sizeof(*entry) + sizeof(u32) * len;
1555 	buffer = tr->buffer;
1556 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1557 					  flags, pc);
1558 	if (!event)
1559 		goto out_unlock;
1560 	entry = ring_buffer_event_data(event);
1561 	entry->ip			= ip;
1562 	entry->fmt			= fmt;
1563 
1564 	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1565 	if (!filter_check_discard(call, entry, buffer, event)) {
1566 		ring_buffer_unlock_commit(buffer, event);
1567 		ftrace_trace_stack(buffer, flags, 6, pc);
1568 	}
1569 
1570 out_unlock:
1571 	arch_spin_unlock(&trace_buf_lock);
1572 	local_irq_restore(flags);
1573 
1574 out:
1575 	atomic_dec_return(&data->disabled);
1576 	preempt_enable_notrace();
1577 	unpause_graph_tracing();
1578 
1579 	return len;
1580 }
1581 EXPORT_SYMBOL_GPL(trace_vbprintk);
1582 
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)1583 int trace_array_printk(struct trace_array *tr,
1584 		       unsigned long ip, const char *fmt, ...)
1585 {
1586 	int ret;
1587 	va_list ap;
1588 
1589 	if (!(trace_flags & TRACE_ITER_PRINTK))
1590 		return 0;
1591 
1592 	va_start(ap, fmt);
1593 	ret = trace_array_vprintk(tr, ip, fmt, ap);
1594 	va_end(ap);
1595 	return ret;
1596 }
1597 
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)1598 int trace_array_vprintk(struct trace_array *tr,
1599 			unsigned long ip, const char *fmt, va_list args)
1600 {
1601 	static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1602 	static char trace_buf[TRACE_BUF_SIZE];
1603 
1604 	struct ftrace_event_call *call = &event_print;
1605 	struct ring_buffer_event *event;
1606 	struct ring_buffer *buffer;
1607 	struct trace_array_cpu *data;
1608 	int cpu, len = 0, size, pc;
1609 	struct print_entry *entry;
1610 	unsigned long irq_flags;
1611 	int disable;
1612 
1613 	if (tracing_disabled || tracing_selftest_running)
1614 		return 0;
1615 
1616 	pc = preempt_count();
1617 	preempt_disable_notrace();
1618 	cpu = raw_smp_processor_id();
1619 	data = tr->data[cpu];
1620 
1621 	disable = atomic_inc_return(&data->disabled);
1622 	if (unlikely(disable != 1))
1623 		goto out;
1624 
1625 	pause_graph_tracing();
1626 	raw_local_irq_save(irq_flags);
1627 	arch_spin_lock(&trace_buf_lock);
1628 	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1629 
1630 	size = sizeof(*entry) + len + 1;
1631 	buffer = tr->buffer;
1632 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1633 					  irq_flags, pc);
1634 	if (!event)
1635 		goto out_unlock;
1636 	entry = ring_buffer_event_data(event);
1637 	entry->ip = ip;
1638 
1639 	memcpy(&entry->buf, trace_buf, len);
1640 	entry->buf[len] = '\0';
1641 	if (!filter_check_discard(call, entry, buffer, event)) {
1642 		ring_buffer_unlock_commit(buffer, event);
1643 		ftrace_trace_stack(buffer, irq_flags, 6, pc);
1644 	}
1645 
1646  out_unlock:
1647 	arch_spin_unlock(&trace_buf_lock);
1648 	raw_local_irq_restore(irq_flags);
1649 	unpause_graph_tracing();
1650  out:
1651 	atomic_dec_return(&data->disabled);
1652 	preempt_enable_notrace();
1653 
1654 	return len;
1655 }
1656 
trace_vprintk(unsigned long ip,const char * fmt,va_list args)1657 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1658 {
1659 	return trace_array_vprintk(&global_trace, ip, fmt, args);
1660 }
1661 EXPORT_SYMBOL_GPL(trace_vprintk);
1662 
trace_iterator_increment(struct trace_iterator * iter)1663 static void trace_iterator_increment(struct trace_iterator *iter)
1664 {
1665 	/* Don't allow ftrace to trace into the ring buffers */
1666 	ftrace_disable_cpu();
1667 
1668 	iter->idx++;
1669 	if (iter->buffer_iter[iter->cpu])
1670 		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1671 
1672 	ftrace_enable_cpu();
1673 }
1674 
1675 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)1676 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1677 		unsigned long *lost_events)
1678 {
1679 	struct ring_buffer_event *event;
1680 	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1681 
1682 	/* Don't allow ftrace to trace into the ring buffers */
1683 	ftrace_disable_cpu();
1684 
1685 	if (buf_iter)
1686 		event = ring_buffer_iter_peek(buf_iter, ts);
1687 	else
1688 		event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
1689 					 lost_events);
1690 
1691 	ftrace_enable_cpu();
1692 
1693 	if (event) {
1694 		iter->ent_size = ring_buffer_event_length(event);
1695 		return ring_buffer_event_data(event);
1696 	}
1697 	iter->ent_size = 0;
1698 	return NULL;
1699 }
1700 
1701 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)1702 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1703 		  unsigned long *missing_events, u64 *ent_ts)
1704 {
1705 	struct ring_buffer *buffer = iter->tr->buffer;
1706 	struct trace_entry *ent, *next = NULL;
1707 	unsigned long lost_events = 0, next_lost = 0;
1708 	int cpu_file = iter->cpu_file;
1709 	u64 next_ts = 0, ts;
1710 	int next_cpu = -1;
1711 	int next_size = 0;
1712 	int cpu;
1713 
1714 	/*
1715 	 * If we are in a per_cpu trace file, don't bother by iterating over
1716 	 * all cpu and peek directly.
1717 	 */
1718 	if (cpu_file > TRACE_PIPE_ALL_CPU) {
1719 		if (ring_buffer_empty_cpu(buffer, cpu_file))
1720 			return NULL;
1721 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
1722 		if (ent_cpu)
1723 			*ent_cpu = cpu_file;
1724 
1725 		return ent;
1726 	}
1727 
1728 	for_each_tracing_cpu(cpu) {
1729 
1730 		if (ring_buffer_empty_cpu(buffer, cpu))
1731 			continue;
1732 
1733 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
1734 
1735 		/*
1736 		 * Pick the entry with the smallest timestamp:
1737 		 */
1738 		if (ent && (!next || ts < next_ts)) {
1739 			next = ent;
1740 			next_cpu = cpu;
1741 			next_ts = ts;
1742 			next_lost = lost_events;
1743 			next_size = iter->ent_size;
1744 		}
1745 	}
1746 
1747 	iter->ent_size = next_size;
1748 
1749 	if (ent_cpu)
1750 		*ent_cpu = next_cpu;
1751 
1752 	if (ent_ts)
1753 		*ent_ts = next_ts;
1754 
1755 	if (missing_events)
1756 		*missing_events = next_lost;
1757 
1758 	return next;
1759 }
1760 
1761 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)1762 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1763 					  int *ent_cpu, u64 *ent_ts)
1764 {
1765 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
1766 }
1767 
1768 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)1769 void *trace_find_next_entry_inc(struct trace_iterator *iter)
1770 {
1771 	iter->ent = __find_next_entry(iter, &iter->cpu,
1772 				      &iter->lost_events, &iter->ts);
1773 
1774 	if (iter->ent)
1775 		trace_iterator_increment(iter);
1776 
1777 	return iter->ent ? iter : NULL;
1778 }
1779 
trace_consume(struct trace_iterator * iter)1780 static void trace_consume(struct trace_iterator *iter)
1781 {
1782 	/* Don't allow ftrace to trace into the ring buffers */
1783 	ftrace_disable_cpu();
1784 	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
1785 			    &iter->lost_events);
1786 	ftrace_enable_cpu();
1787 }
1788 
s_next(struct seq_file * m,void * v,loff_t * pos)1789 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1790 {
1791 	struct trace_iterator *iter = m->private;
1792 	int i = (int)*pos;
1793 	void *ent;
1794 
1795 	WARN_ON_ONCE(iter->leftover);
1796 
1797 	(*pos)++;
1798 
1799 	/* can't go backwards */
1800 	if (iter->idx > i)
1801 		return NULL;
1802 
1803 	if (iter->idx < 0)
1804 		ent = trace_find_next_entry_inc(iter);
1805 	else
1806 		ent = iter;
1807 
1808 	while (ent && iter->idx < i)
1809 		ent = trace_find_next_entry_inc(iter);
1810 
1811 	iter->pos = *pos;
1812 
1813 	return ent;
1814 }
1815 
tracing_iter_reset(struct trace_iterator * iter,int cpu)1816 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1817 {
1818 	struct trace_array *tr = iter->tr;
1819 	struct ring_buffer_event *event;
1820 	struct ring_buffer_iter *buf_iter;
1821 	unsigned long entries = 0;
1822 	u64 ts;
1823 
1824 	tr->data[cpu]->skipped_entries = 0;
1825 
1826 	if (!iter->buffer_iter[cpu])
1827 		return;
1828 
1829 	buf_iter = iter->buffer_iter[cpu];
1830 	ring_buffer_iter_reset(buf_iter);
1831 
1832 	/*
1833 	 * We could have the case with the max latency tracers
1834 	 * that a reset never took place on a cpu. This is evident
1835 	 * by the timestamp being before the start of the buffer.
1836 	 */
1837 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
1838 		if (ts >= iter->tr->time_start)
1839 			break;
1840 		entries++;
1841 		ring_buffer_read(buf_iter, NULL);
1842 	}
1843 
1844 	tr->data[cpu]->skipped_entries = entries;
1845 }
1846 
1847 /*
1848  * The current tracer is copied to avoid a global locking
1849  * all around.
1850  */
s_start(struct seq_file * m,loff_t * pos)1851 static void *s_start(struct seq_file *m, loff_t *pos)
1852 {
1853 	struct trace_iterator *iter = m->private;
1854 	static struct tracer *old_tracer;
1855 	int cpu_file = iter->cpu_file;
1856 	void *p = NULL;
1857 	loff_t l = 0;
1858 	int cpu;
1859 
1860 	/* copy the tracer to avoid using a global lock all around */
1861 	mutex_lock(&trace_types_lock);
1862 	if (unlikely(old_tracer != current_trace && current_trace)) {
1863 		old_tracer = current_trace;
1864 		*iter->trace = *current_trace;
1865 	}
1866 	mutex_unlock(&trace_types_lock);
1867 
1868 	atomic_inc(&trace_record_cmdline_disabled);
1869 
1870 	if (*pos != iter->pos) {
1871 		iter->ent = NULL;
1872 		iter->cpu = 0;
1873 		iter->idx = -1;
1874 
1875 		ftrace_disable_cpu();
1876 
1877 		if (cpu_file == TRACE_PIPE_ALL_CPU) {
1878 			for_each_tracing_cpu(cpu)
1879 				tracing_iter_reset(iter, cpu);
1880 		} else
1881 			tracing_iter_reset(iter, cpu_file);
1882 
1883 		ftrace_enable_cpu();
1884 
1885 		iter->leftover = 0;
1886 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1887 			;
1888 
1889 	} else {
1890 		/*
1891 		 * If we overflowed the seq_file before, then we want
1892 		 * to just reuse the trace_seq buffer again.
1893 		 */
1894 		if (iter->leftover)
1895 			p = iter;
1896 		else {
1897 			l = *pos - 1;
1898 			p = s_next(m, p, &l);
1899 		}
1900 	}
1901 
1902 	trace_event_read_lock();
1903 	trace_access_lock(cpu_file);
1904 	return p;
1905 }
1906 
s_stop(struct seq_file * m,void * p)1907 static void s_stop(struct seq_file *m, void *p)
1908 {
1909 	struct trace_iterator *iter = m->private;
1910 
1911 	atomic_dec(&trace_record_cmdline_disabled);
1912 	trace_access_unlock(iter->cpu_file);
1913 	trace_event_read_unlock();
1914 }
1915 
1916 static void
get_total_entries(struct trace_array * tr,unsigned long * total,unsigned long * entries)1917 get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries)
1918 {
1919 	unsigned long count;
1920 	int cpu;
1921 
1922 	*total = 0;
1923 	*entries = 0;
1924 
1925 	for_each_tracing_cpu(cpu) {
1926 		count = ring_buffer_entries_cpu(tr->buffer, cpu);
1927 		/*
1928 		 * If this buffer has skipped entries, then we hold all
1929 		 * entries for the trace and we need to ignore the
1930 		 * ones before the time stamp.
1931 		 */
1932 		if (tr->data[cpu]->skipped_entries) {
1933 			count -= tr->data[cpu]->skipped_entries;
1934 			/* total is the same as the entries */
1935 			*total += count;
1936 		} else
1937 			*total += count +
1938 				ring_buffer_overrun_cpu(tr->buffer, cpu);
1939 		*entries += count;
1940 	}
1941 }
1942 
print_lat_help_header(struct seq_file * m)1943 static void print_lat_help_header(struct seq_file *m)
1944 {
1945 	seq_puts(m, "#                  _------=> CPU#            \n");
1946 	seq_puts(m, "#                 / _-----=> irqs-off        \n");
1947 	seq_puts(m, "#                | / _----=> need-resched    \n");
1948 	seq_puts(m, "#                || / _---=> hardirq/softirq \n");
1949 	seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
1950 	seq_puts(m, "#                |||| /     delay             \n");
1951 	seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
1952 	seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
1953 }
1954 
print_event_info(struct trace_array * tr,struct seq_file * m)1955 static void print_event_info(struct trace_array *tr, struct seq_file *m)
1956 {
1957 	unsigned long total;
1958 	unsigned long entries;
1959 
1960 	get_total_entries(tr, &total, &entries);
1961 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
1962 		   entries, total, num_online_cpus());
1963 	seq_puts(m, "#\n");
1964 }
1965 
print_func_help_header(struct trace_array * tr,struct seq_file * m)1966 static void print_func_help_header(struct trace_array *tr, struct seq_file *m)
1967 {
1968 	print_event_info(tr, m);
1969 	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
1970 	seq_puts(m, "#              | |       |          |         |\n");
1971 }
1972 
print_func_help_header_irq(struct trace_array * tr,struct seq_file * m)1973 static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
1974 {
1975 	print_event_info(tr, m);
1976 	seq_puts(m, "#                              _-----=> irqs-off\n");
1977 	seq_puts(m, "#                             / _----=> need-resched\n");
1978 	seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
1979 	seq_puts(m, "#                            || / _--=> preempt-depth\n");
1980 	seq_puts(m, "#                            ||| /     delay\n");
1981 	seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
1982 	seq_puts(m, "#              | |       |   ||||       |         |\n");
1983 }
1984 
1985 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)1986 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1987 {
1988 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1989 	struct trace_array *tr = iter->tr;
1990 	struct trace_array_cpu *data = tr->data[tr->cpu];
1991 	struct tracer *type = current_trace;
1992 	unsigned long entries;
1993 	unsigned long total;
1994 	const char *name = "preemption";
1995 
1996 	if (type)
1997 		name = type->name;
1998 
1999 	get_total_entries(tr, &total, &entries);
2000 
2001 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2002 		   name, UTS_RELEASE);
2003 	seq_puts(m, "# -----------------------------------"
2004 		 "---------------------------------\n");
2005 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2006 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2007 		   nsecs_to_usecs(data->saved_latency),
2008 		   entries,
2009 		   total,
2010 		   tr->cpu,
2011 #if defined(CONFIG_PREEMPT_NONE)
2012 		   "server",
2013 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2014 		   "desktop",
2015 #elif defined(CONFIG_PREEMPT)
2016 		   "preempt",
2017 #else
2018 		   "unknown",
2019 #endif
2020 		   /* These are reserved for later use */
2021 		   0, 0, 0, 0);
2022 #ifdef CONFIG_SMP
2023 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2024 #else
2025 	seq_puts(m, ")\n");
2026 #endif
2027 	seq_puts(m, "#    -----------------\n");
2028 	seq_printf(m, "#    | task: %.16s-%d "
2029 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2030 		   data->comm, data->pid, data->uid, data->nice,
2031 		   data->policy, data->rt_priority);
2032 	seq_puts(m, "#    -----------------\n");
2033 
2034 	if (data->critical_start) {
2035 		seq_puts(m, "#  => started at: ");
2036 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2037 		trace_print_seq(m, &iter->seq);
2038 		seq_puts(m, "\n#  => ended at:   ");
2039 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2040 		trace_print_seq(m, &iter->seq);
2041 		seq_puts(m, "\n#\n");
2042 	}
2043 
2044 	seq_puts(m, "#\n");
2045 }
2046 
test_cpu_buff_start(struct trace_iterator * iter)2047 static void test_cpu_buff_start(struct trace_iterator *iter)
2048 {
2049 	struct trace_seq *s = &iter->seq;
2050 
2051 	if (!(trace_flags & TRACE_ITER_ANNOTATE))
2052 		return;
2053 
2054 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2055 		return;
2056 
2057 	if (cpumask_test_cpu(iter->cpu, iter->started))
2058 		return;
2059 
2060 	if (iter->tr->data[iter->cpu]->skipped_entries)
2061 		return;
2062 
2063 	cpumask_set_cpu(iter->cpu, iter->started);
2064 
2065 	/* Don't print started cpu buffer for the first entry of the trace */
2066 	if (iter->idx > 1)
2067 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2068 				iter->cpu);
2069 }
2070 
print_trace_fmt(struct trace_iterator * iter)2071 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2072 {
2073 	struct trace_seq *s = &iter->seq;
2074 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2075 	struct trace_entry *entry;
2076 	struct trace_event *event;
2077 
2078 	entry = iter->ent;
2079 
2080 	test_cpu_buff_start(iter);
2081 
2082 	event = ftrace_find_event(entry->type);
2083 
2084 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2085 		if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2086 			if (!trace_print_lat_context(iter))
2087 				goto partial;
2088 		} else {
2089 			if (!trace_print_context(iter))
2090 				goto partial;
2091 		}
2092 	}
2093 
2094 	if (event)
2095 		return event->funcs->trace(iter, sym_flags, event);
2096 
2097 	if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2098 		goto partial;
2099 
2100 	return TRACE_TYPE_HANDLED;
2101 partial:
2102 	return TRACE_TYPE_PARTIAL_LINE;
2103 }
2104 
print_raw_fmt(struct trace_iterator * iter)2105 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2106 {
2107 	struct trace_seq *s = &iter->seq;
2108 	struct trace_entry *entry;
2109 	struct trace_event *event;
2110 
2111 	entry = iter->ent;
2112 
2113 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2114 		if (!trace_seq_printf(s, "%d %d %llu ",
2115 				      entry->pid, iter->cpu, iter->ts))
2116 			goto partial;
2117 	}
2118 
2119 	event = ftrace_find_event(entry->type);
2120 	if (event)
2121 		return event->funcs->raw(iter, 0, event);
2122 
2123 	if (!trace_seq_printf(s, "%d ?\n", entry->type))
2124 		goto partial;
2125 
2126 	return TRACE_TYPE_HANDLED;
2127 partial:
2128 	return TRACE_TYPE_PARTIAL_LINE;
2129 }
2130 
print_hex_fmt(struct trace_iterator * iter)2131 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2132 {
2133 	struct trace_seq *s = &iter->seq;
2134 	unsigned char newline = '\n';
2135 	struct trace_entry *entry;
2136 	struct trace_event *event;
2137 
2138 	entry = iter->ent;
2139 
2140 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2141 		SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2142 		SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2143 		SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2144 	}
2145 
2146 	event = ftrace_find_event(entry->type);
2147 	if (event) {
2148 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
2149 		if (ret != TRACE_TYPE_HANDLED)
2150 			return ret;
2151 	}
2152 
2153 	SEQ_PUT_FIELD_RET(s, newline);
2154 
2155 	return TRACE_TYPE_HANDLED;
2156 }
2157 
print_bin_fmt(struct trace_iterator * iter)2158 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2159 {
2160 	struct trace_seq *s = &iter->seq;
2161 	struct trace_entry *entry;
2162 	struct trace_event *event;
2163 
2164 	entry = iter->ent;
2165 
2166 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2167 		SEQ_PUT_FIELD_RET(s, entry->pid);
2168 		SEQ_PUT_FIELD_RET(s, iter->cpu);
2169 		SEQ_PUT_FIELD_RET(s, iter->ts);
2170 	}
2171 
2172 	event = ftrace_find_event(entry->type);
2173 	return event ? event->funcs->binary(iter, 0, event) :
2174 		TRACE_TYPE_HANDLED;
2175 }
2176 
trace_empty(struct trace_iterator * iter)2177 int trace_empty(struct trace_iterator *iter)
2178 {
2179 	int cpu;
2180 
2181 	/* If we are looking at one CPU buffer, only check that one */
2182 	if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
2183 		cpu = iter->cpu_file;
2184 		if (iter->buffer_iter[cpu]) {
2185 			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
2186 				return 0;
2187 		} else {
2188 			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
2189 				return 0;
2190 		}
2191 		return 1;
2192 	}
2193 
2194 	for_each_tracing_cpu(cpu) {
2195 		if (iter->buffer_iter[cpu]) {
2196 			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
2197 				return 0;
2198 		} else {
2199 			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
2200 				return 0;
2201 		}
2202 	}
2203 
2204 	return 1;
2205 }
2206 
2207 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)2208 enum print_line_t print_trace_line(struct trace_iterator *iter)
2209 {
2210 	enum print_line_t ret;
2211 
2212 	if (iter->lost_events &&
2213 	    !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2214 				 iter->cpu, iter->lost_events))
2215 		return TRACE_TYPE_PARTIAL_LINE;
2216 
2217 	if (iter->trace && iter->trace->print_line) {
2218 		ret = iter->trace->print_line(iter);
2219 		if (ret != TRACE_TYPE_UNHANDLED)
2220 			return ret;
2221 	}
2222 
2223 	if (iter->ent->type == TRACE_BPRINT &&
2224 			trace_flags & TRACE_ITER_PRINTK &&
2225 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2226 		return trace_print_bprintk_msg_only(iter);
2227 
2228 	if (iter->ent->type == TRACE_PRINT &&
2229 			trace_flags & TRACE_ITER_PRINTK &&
2230 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2231 		return trace_print_printk_msg_only(iter);
2232 
2233 	if (trace_flags & TRACE_ITER_BIN)
2234 		return print_bin_fmt(iter);
2235 
2236 	if (trace_flags & TRACE_ITER_HEX)
2237 		return print_hex_fmt(iter);
2238 
2239 	if (trace_flags & TRACE_ITER_RAW)
2240 		return print_raw_fmt(iter);
2241 
2242 	return print_trace_fmt(iter);
2243 }
2244 
trace_latency_header(struct seq_file * m)2245 void trace_latency_header(struct seq_file *m)
2246 {
2247 	struct trace_iterator *iter = m->private;
2248 
2249 	/* print nothing if the buffers are empty */
2250 	if (trace_empty(iter))
2251 		return;
2252 
2253 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2254 		print_trace_header(m, iter);
2255 
2256 	if (!(trace_flags & TRACE_ITER_VERBOSE))
2257 		print_lat_help_header(m);
2258 }
2259 
trace_default_header(struct seq_file * m)2260 void trace_default_header(struct seq_file *m)
2261 {
2262 	struct trace_iterator *iter = m->private;
2263 
2264 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2265 		return;
2266 
2267 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2268 		/* print nothing if the buffers are empty */
2269 		if (trace_empty(iter))
2270 			return;
2271 		print_trace_header(m, iter);
2272 		if (!(trace_flags & TRACE_ITER_VERBOSE))
2273 			print_lat_help_header(m);
2274 	} else {
2275 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2276 			if (trace_flags & TRACE_ITER_IRQ_INFO)
2277 				print_func_help_header_irq(iter->tr, m);
2278 			else
2279 				print_func_help_header(iter->tr, m);
2280 		}
2281 	}
2282 }
2283 
test_ftrace_alive(struct seq_file * m)2284 static void test_ftrace_alive(struct seq_file *m)
2285 {
2286 	if (!ftrace_is_dead())
2287 		return;
2288 	seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2289 	seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2290 }
2291 
s_show(struct seq_file * m,void * v)2292 static int s_show(struct seq_file *m, void *v)
2293 {
2294 	struct trace_iterator *iter = v;
2295 	int ret;
2296 
2297 	if (iter->ent == NULL) {
2298 		if (iter->tr) {
2299 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
2300 			seq_puts(m, "#\n");
2301 			test_ftrace_alive(m);
2302 		}
2303 		if (iter->trace && iter->trace->print_header)
2304 			iter->trace->print_header(m);
2305 		else
2306 			trace_default_header(m);
2307 
2308 	} else if (iter->leftover) {
2309 		/*
2310 		 * If we filled the seq_file buffer earlier, we
2311 		 * want to just show it now.
2312 		 */
2313 		ret = trace_print_seq(m, &iter->seq);
2314 
2315 		/* ret should this time be zero, but you never know */
2316 		iter->leftover = ret;
2317 
2318 	} else {
2319 		print_trace_line(iter);
2320 		ret = trace_print_seq(m, &iter->seq);
2321 		/*
2322 		 * If we overflow the seq_file buffer, then it will
2323 		 * ask us for this data again at start up.
2324 		 * Use that instead.
2325 		 *  ret is 0 if seq_file write succeeded.
2326 		 *        -1 otherwise.
2327 		 */
2328 		iter->leftover = ret;
2329 	}
2330 
2331 	return 0;
2332 }
2333 
2334 static const struct seq_operations tracer_seq_ops = {
2335 	.start		= s_start,
2336 	.next		= s_next,
2337 	.stop		= s_stop,
2338 	.show		= s_show,
2339 };
2340 
2341 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file)2342 __tracing_open(struct inode *inode, struct file *file)
2343 {
2344 	long cpu_file = (long) inode->i_private;
2345 	void *fail_ret = ERR_PTR(-ENOMEM);
2346 	struct trace_iterator *iter;
2347 	struct seq_file *m;
2348 	int cpu, ret;
2349 
2350 	if (tracing_disabled)
2351 		return ERR_PTR(-ENODEV);
2352 
2353 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2354 	if (!iter)
2355 		return ERR_PTR(-ENOMEM);
2356 
2357 	/*
2358 	 * We make a copy of the current tracer to avoid concurrent
2359 	 * changes on it while we are reading.
2360 	 */
2361 	mutex_lock(&trace_types_lock);
2362 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2363 	if (!iter->trace)
2364 		goto fail;
2365 
2366 	if (current_trace)
2367 		*iter->trace = *current_trace;
2368 
2369 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2370 		goto fail;
2371 
2372 	if (current_trace && current_trace->print_max)
2373 		iter->tr = &max_tr;
2374 	else
2375 		iter->tr = &global_trace;
2376 	iter->pos = -1;
2377 	mutex_init(&iter->mutex);
2378 	iter->cpu_file = cpu_file;
2379 
2380 	/* Notify the tracer early; before we stop tracing. */
2381 	if (iter->trace && iter->trace->open)
2382 		iter->trace->open(iter);
2383 
2384 	/* Annotate start of buffers if we had overruns */
2385 	if (ring_buffer_overruns(iter->tr->buffer))
2386 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
2387 
2388 	/* stop the trace while dumping */
2389 	tracing_stop();
2390 
2391 	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
2392 		for_each_tracing_cpu(cpu) {
2393 			iter->buffer_iter[cpu] =
2394 				ring_buffer_read_prepare(iter->tr->buffer, cpu);
2395 		}
2396 		ring_buffer_read_prepare_sync();
2397 		for_each_tracing_cpu(cpu) {
2398 			ring_buffer_read_start(iter->buffer_iter[cpu]);
2399 			tracing_iter_reset(iter, cpu);
2400 		}
2401 	} else {
2402 		cpu = iter->cpu_file;
2403 		iter->buffer_iter[cpu] =
2404 			ring_buffer_read_prepare(iter->tr->buffer, cpu);
2405 		ring_buffer_read_prepare_sync();
2406 		ring_buffer_read_start(iter->buffer_iter[cpu]);
2407 		tracing_iter_reset(iter, cpu);
2408 	}
2409 
2410 	ret = seq_open(file, &tracer_seq_ops);
2411 	if (ret < 0) {
2412 		fail_ret = ERR_PTR(ret);
2413 		goto fail_buffer;
2414 	}
2415 
2416 	m = file->private_data;
2417 	m->private = iter;
2418 
2419 	mutex_unlock(&trace_types_lock);
2420 
2421 	return iter;
2422 
2423  fail_buffer:
2424 	for_each_tracing_cpu(cpu) {
2425 		if (iter->buffer_iter[cpu])
2426 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
2427 	}
2428 	free_cpumask_var(iter->started);
2429 	tracing_start();
2430  fail:
2431 	mutex_unlock(&trace_types_lock);
2432 	kfree(iter->trace);
2433 	kfree(iter);
2434 
2435 	return fail_ret;
2436 }
2437 
tracing_open_generic(struct inode * inode,struct file * filp)2438 int tracing_open_generic(struct inode *inode, struct file *filp)
2439 {
2440 	if (tracing_disabled)
2441 		return -ENODEV;
2442 
2443 	filp->private_data = inode->i_private;
2444 	return 0;
2445 }
2446 
tracing_release(struct inode * inode,struct file * file)2447 static int tracing_release(struct inode *inode, struct file *file)
2448 {
2449 	struct seq_file *m = file->private_data;
2450 	struct trace_iterator *iter;
2451 	int cpu;
2452 
2453 	if (!(file->f_mode & FMODE_READ))
2454 		return 0;
2455 
2456 	iter = m->private;
2457 
2458 	mutex_lock(&trace_types_lock);
2459 	for_each_tracing_cpu(cpu) {
2460 		if (iter->buffer_iter[cpu])
2461 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
2462 	}
2463 
2464 	if (iter->trace && iter->trace->close)
2465 		iter->trace->close(iter);
2466 
2467 	/* reenable tracing if it was previously enabled */
2468 	tracing_start();
2469 	mutex_unlock(&trace_types_lock);
2470 
2471 	seq_release(inode, file);
2472 	mutex_destroy(&iter->mutex);
2473 	free_cpumask_var(iter->started);
2474 	kfree(iter->trace);
2475 	kfree(iter);
2476 	return 0;
2477 }
2478 
tracing_open(struct inode * inode,struct file * file)2479 static int tracing_open(struct inode *inode, struct file *file)
2480 {
2481 	struct trace_iterator *iter;
2482 	int ret = 0;
2483 
2484 	/* If this file was open for write, then erase contents */
2485 	if ((file->f_mode & FMODE_WRITE) &&
2486 	    (file->f_flags & O_TRUNC)) {
2487 		long cpu = (long) inode->i_private;
2488 
2489 		if (cpu == TRACE_PIPE_ALL_CPU)
2490 			tracing_reset_online_cpus(&global_trace);
2491 		else
2492 			tracing_reset(&global_trace, cpu);
2493 	}
2494 
2495 	if (file->f_mode & FMODE_READ) {
2496 		iter = __tracing_open(inode, file);
2497 		if (IS_ERR(iter))
2498 			ret = PTR_ERR(iter);
2499 		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
2500 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
2501 	}
2502 	return ret;
2503 }
2504 
2505 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)2506 t_next(struct seq_file *m, void *v, loff_t *pos)
2507 {
2508 	struct tracer *t = v;
2509 
2510 	(*pos)++;
2511 
2512 	if (t)
2513 		t = t->next;
2514 
2515 	return t;
2516 }
2517 
t_start(struct seq_file * m,loff_t * pos)2518 static void *t_start(struct seq_file *m, loff_t *pos)
2519 {
2520 	struct tracer *t;
2521 	loff_t l = 0;
2522 
2523 	mutex_lock(&trace_types_lock);
2524 	for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
2525 		;
2526 
2527 	return t;
2528 }
2529 
t_stop(struct seq_file * m,void * p)2530 static void t_stop(struct seq_file *m, void *p)
2531 {
2532 	mutex_unlock(&trace_types_lock);
2533 }
2534 
t_show(struct seq_file * m,void * v)2535 static int t_show(struct seq_file *m, void *v)
2536 {
2537 	struct tracer *t = v;
2538 
2539 	if (!t)
2540 		return 0;
2541 
2542 	seq_printf(m, "%s", t->name);
2543 	if (t->next)
2544 		seq_putc(m, ' ');
2545 	else
2546 		seq_putc(m, '\n');
2547 
2548 	return 0;
2549 }
2550 
2551 static const struct seq_operations show_traces_seq_ops = {
2552 	.start		= t_start,
2553 	.next		= t_next,
2554 	.stop		= t_stop,
2555 	.show		= t_show,
2556 };
2557 
show_traces_open(struct inode * inode,struct file * file)2558 static int show_traces_open(struct inode *inode, struct file *file)
2559 {
2560 	if (tracing_disabled)
2561 		return -ENODEV;
2562 
2563 	return seq_open(file, &show_traces_seq_ops);
2564 }
2565 
2566 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)2567 tracing_write_stub(struct file *filp, const char __user *ubuf,
2568 		   size_t count, loff_t *ppos)
2569 {
2570 	return count;
2571 }
2572 
tracing_seek(struct file * file,loff_t offset,int origin)2573 static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
2574 {
2575 	if (file->f_mode & FMODE_READ)
2576 		return seq_lseek(file, offset, origin);
2577 	else
2578 		return 0;
2579 }
2580 
2581 static const struct file_operations tracing_fops = {
2582 	.open		= tracing_open,
2583 	.read		= seq_read,
2584 	.write		= tracing_write_stub,
2585 	.llseek		= tracing_seek,
2586 	.release	= tracing_release,
2587 };
2588 
2589 static const struct file_operations show_traces_fops = {
2590 	.open		= show_traces_open,
2591 	.read		= seq_read,
2592 	.release	= seq_release,
2593 	.llseek		= seq_lseek,
2594 };
2595 
2596 /*
2597  * Only trace on a CPU if the bitmask is set:
2598  */
2599 static cpumask_var_t tracing_cpumask;
2600 
2601 /*
2602  * The tracer itself will not take this lock, but still we want
2603  * to provide a consistent cpumask to user-space:
2604  */
2605 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2606 
2607 /*
2608  * Temporary storage for the character representation of the
2609  * CPU bitmask (and one more byte for the newline):
2610  */
2611 static char mask_str[NR_CPUS + 1];
2612 
2613 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)2614 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2615 		     size_t count, loff_t *ppos)
2616 {
2617 	int len;
2618 
2619 	mutex_lock(&tracing_cpumask_update_lock);
2620 
2621 	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2622 	if (count - len < 2) {
2623 		count = -EINVAL;
2624 		goto out_err;
2625 	}
2626 	len += sprintf(mask_str + len, "\n");
2627 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2628 
2629 out_err:
2630 	mutex_unlock(&tracing_cpumask_update_lock);
2631 
2632 	return count;
2633 }
2634 
2635 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)2636 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2637 		      size_t count, loff_t *ppos)
2638 {
2639 	int err, cpu;
2640 	cpumask_var_t tracing_cpumask_new;
2641 
2642 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2643 		return -ENOMEM;
2644 
2645 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2646 	if (err)
2647 		goto err_unlock;
2648 
2649 	mutex_lock(&tracing_cpumask_update_lock);
2650 
2651 	local_irq_disable();
2652 	arch_spin_lock(&ftrace_max_lock);
2653 	for_each_tracing_cpu(cpu) {
2654 		/*
2655 		 * Increase/decrease the disabled counter if we are
2656 		 * about to flip a bit in the cpumask:
2657 		 */
2658 		if (cpumask_test_cpu(cpu, tracing_cpumask) &&
2659 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2660 			atomic_inc(&global_trace.data[cpu]->disabled);
2661 			ring_buffer_record_disable_cpu(global_trace.buffer, cpu);
2662 		}
2663 		if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
2664 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2665 			atomic_dec(&global_trace.data[cpu]->disabled);
2666 			ring_buffer_record_enable_cpu(global_trace.buffer, cpu);
2667 		}
2668 	}
2669 	arch_spin_unlock(&ftrace_max_lock);
2670 	local_irq_enable();
2671 
2672 	cpumask_copy(tracing_cpumask, tracing_cpumask_new);
2673 
2674 	mutex_unlock(&tracing_cpumask_update_lock);
2675 	free_cpumask_var(tracing_cpumask_new);
2676 
2677 	return count;
2678 
2679 err_unlock:
2680 	free_cpumask_var(tracing_cpumask_new);
2681 
2682 	return err;
2683 }
2684 
2685 static const struct file_operations tracing_cpumask_fops = {
2686 	.open		= tracing_open_generic,
2687 	.read		= tracing_cpumask_read,
2688 	.write		= tracing_cpumask_write,
2689 	.llseek		= generic_file_llseek,
2690 };
2691 
tracing_trace_options_show(struct seq_file * m,void * v)2692 static int tracing_trace_options_show(struct seq_file *m, void *v)
2693 {
2694 	struct tracer_opt *trace_opts;
2695 	u32 tracer_flags;
2696 	int i;
2697 
2698 	mutex_lock(&trace_types_lock);
2699 	tracer_flags = current_trace->flags->val;
2700 	trace_opts = current_trace->flags->opts;
2701 
2702 	for (i = 0; trace_options[i]; i++) {
2703 		if (trace_flags & (1 << i))
2704 			seq_printf(m, "%s\n", trace_options[i]);
2705 		else
2706 			seq_printf(m, "no%s\n", trace_options[i]);
2707 	}
2708 
2709 	for (i = 0; trace_opts[i].name; i++) {
2710 		if (tracer_flags & trace_opts[i].bit)
2711 			seq_printf(m, "%s\n", trace_opts[i].name);
2712 		else
2713 			seq_printf(m, "no%s\n", trace_opts[i].name);
2714 	}
2715 	mutex_unlock(&trace_types_lock);
2716 
2717 	return 0;
2718 }
2719 
__set_tracer_option(struct tracer * trace,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)2720 static int __set_tracer_option(struct tracer *trace,
2721 			       struct tracer_flags *tracer_flags,
2722 			       struct tracer_opt *opts, int neg)
2723 {
2724 	int ret;
2725 
2726 	ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
2727 	if (ret)
2728 		return ret;
2729 
2730 	if (neg)
2731 		tracer_flags->val &= ~opts->bit;
2732 	else
2733 		tracer_flags->val |= opts->bit;
2734 	return 0;
2735 }
2736 
2737 /* Try to assign a tracer specific option */
set_tracer_option(struct tracer * trace,char * cmp,int neg)2738 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2739 {
2740 	struct tracer_flags *tracer_flags = trace->flags;
2741 	struct tracer_opt *opts = NULL;
2742 	int i;
2743 
2744 	for (i = 0; tracer_flags->opts[i].name; i++) {
2745 		opts = &tracer_flags->opts[i];
2746 
2747 		if (strcmp(cmp, opts->name) == 0)
2748 			return __set_tracer_option(trace, trace->flags,
2749 						   opts, neg);
2750 	}
2751 
2752 	return -EINVAL;
2753 }
2754 
2755 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)2756 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
2757 {
2758 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
2759 		return -1;
2760 
2761 	return 0;
2762 }
2763 
set_tracer_flag(unsigned int mask,int enabled)2764 int set_tracer_flag(unsigned int mask, int enabled)
2765 {
2766 	/* do nothing if flag is already set */
2767 	if (!!(trace_flags & mask) == !!enabled)
2768 		return 0;
2769 
2770 	/* Give the tracer a chance to approve the change */
2771 	if (current_trace->flag_changed)
2772 		if (current_trace->flag_changed(current_trace, mask, !!enabled))
2773 			return -EINVAL;
2774 
2775 	if (enabled)
2776 		trace_flags |= mask;
2777 	else
2778 		trace_flags &= ~mask;
2779 
2780 	if (mask == TRACE_ITER_RECORD_CMD)
2781 		trace_event_enable_cmd_record(enabled);
2782 
2783 	if (mask == TRACE_ITER_OVERWRITE) {
2784 		ring_buffer_change_overwrite(global_trace.buffer, enabled);
2785 #ifdef CONFIG_TRACER_MAX_TRACE
2786 		ring_buffer_change_overwrite(max_tr.buffer, enabled);
2787 #endif
2788 	}
2789 
2790 	return 0;
2791 }
2792 
2793 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2794 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2795 			size_t cnt, loff_t *ppos)
2796 {
2797 	char buf[64];
2798 	char *cmp;
2799 	int neg = 0;
2800 	int ret = -ENODEV;
2801 	int i;
2802 
2803 	if (cnt >= sizeof(buf))
2804 		return -EINVAL;
2805 
2806 	if (copy_from_user(&buf, ubuf, cnt))
2807 		return -EFAULT;
2808 
2809 	buf[cnt] = 0;
2810 	cmp = strstrip(buf);
2811 
2812 	if (strncmp(cmp, "no", 2) == 0) {
2813 		neg = 1;
2814 		cmp += 2;
2815 	}
2816 
2817 	mutex_lock(&trace_types_lock);
2818 
2819 	for (i = 0; trace_options[i]; i++) {
2820 		if (strcmp(cmp, trace_options[i]) == 0) {
2821 			ret = set_tracer_flag(1 << i, !neg);
2822 			break;
2823 		}
2824 	}
2825 
2826 	/* If no option could be set, test the specific tracer options */
2827 	if (!trace_options[i])
2828 		ret = set_tracer_option(current_trace, cmp, neg);
2829 
2830 	mutex_unlock(&trace_types_lock);
2831 
2832 	if (ret < 0)
2833 		return ret;
2834 
2835 	*ppos += cnt;
2836 
2837 	return cnt;
2838 }
2839 
tracing_trace_options_open(struct inode * inode,struct file * file)2840 static int tracing_trace_options_open(struct inode *inode, struct file *file)
2841 {
2842 	if (tracing_disabled)
2843 		return -ENODEV;
2844 	return single_open(file, tracing_trace_options_show, NULL);
2845 }
2846 
2847 static const struct file_operations tracing_iter_fops = {
2848 	.open		= tracing_trace_options_open,
2849 	.read		= seq_read,
2850 	.llseek		= seq_lseek,
2851 	.release	= single_release,
2852 	.write		= tracing_trace_options_write,
2853 };
2854 
2855 static const char readme_msg[] =
2856 	"tracing mini-HOWTO:\n\n"
2857 	"# mount -t debugfs nodev /sys/kernel/debug\n\n"
2858 	"# cat /sys/kernel/debug/tracing/available_tracers\n"
2859 	"wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n"
2860 	"# cat /sys/kernel/debug/tracing/current_tracer\n"
2861 	"nop\n"
2862 	"# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n"
2863 	"# cat /sys/kernel/debug/tracing/current_tracer\n"
2864 	"wakeup\n"
2865 	"# cat /sys/kernel/debug/tracing/trace_options\n"
2866 	"noprint-parent nosym-offset nosym-addr noverbose\n"
2867 	"# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
2868 	"# echo 1 > /sys/kernel/debug/tracing/tracing_on\n"
2869 	"# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
2870 	"# echo 0 > /sys/kernel/debug/tracing/tracing_on\n"
2871 ;
2872 
2873 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)2874 tracing_readme_read(struct file *filp, char __user *ubuf,
2875 		       size_t cnt, loff_t *ppos)
2876 {
2877 	return simple_read_from_buffer(ubuf, cnt, ppos,
2878 					readme_msg, strlen(readme_msg));
2879 }
2880 
2881 static const struct file_operations tracing_readme_fops = {
2882 	.open		= tracing_open_generic,
2883 	.read		= tracing_readme_read,
2884 	.llseek		= generic_file_llseek,
2885 };
2886 
2887 static ssize_t
tracing_saved_cmdlines_read(struct file * file,char __user * ubuf,size_t cnt,loff_t * ppos)2888 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
2889 				size_t cnt, loff_t *ppos)
2890 {
2891 	char *buf_comm;
2892 	char *file_buf;
2893 	char *buf;
2894 	int len = 0;
2895 	int pid;
2896 	int i;
2897 
2898 	file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
2899 	if (!file_buf)
2900 		return -ENOMEM;
2901 
2902 	buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
2903 	if (!buf_comm) {
2904 		kfree(file_buf);
2905 		return -ENOMEM;
2906 	}
2907 
2908 	buf = file_buf;
2909 
2910 	for (i = 0; i < SAVED_CMDLINES; i++) {
2911 		int r;
2912 
2913 		pid = map_cmdline_to_pid[i];
2914 		if (pid == -1 || pid == NO_CMDLINE_MAP)
2915 			continue;
2916 
2917 		trace_find_cmdline(pid, buf_comm);
2918 		r = sprintf(buf, "%d %s\n", pid, buf_comm);
2919 		buf += r;
2920 		len += r;
2921 	}
2922 
2923 	len = simple_read_from_buffer(ubuf, cnt, ppos,
2924 				      file_buf, len);
2925 
2926 	kfree(file_buf);
2927 	kfree(buf_comm);
2928 
2929 	return len;
2930 }
2931 
2932 static const struct file_operations tracing_saved_cmdlines_fops = {
2933     .open       = tracing_open_generic,
2934     .read       = tracing_saved_cmdlines_read,
2935     .llseek	= generic_file_llseek,
2936 };
2937 
2938 static ssize_t
tracing_ctrl_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)2939 tracing_ctrl_read(struct file *filp, char __user *ubuf,
2940 		  size_t cnt, loff_t *ppos)
2941 {
2942 	char buf[64];
2943 	int r;
2944 
2945 	r = sprintf(buf, "%u\n", tracer_enabled);
2946 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2947 }
2948 
2949 static ssize_t
tracing_ctrl_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2950 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2951 		   size_t cnt, loff_t *ppos)
2952 {
2953 	struct trace_array *tr = filp->private_data;
2954 	unsigned long val;
2955 	int ret;
2956 
2957 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
2958 	if (ret)
2959 		return ret;
2960 
2961 	val = !!val;
2962 
2963 	mutex_lock(&trace_types_lock);
2964 	if (tracer_enabled ^ val) {
2965 
2966 		/* Only need to warn if this is used to change the state */
2967 		WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on");
2968 
2969 		if (val) {
2970 			tracer_enabled = 1;
2971 			if (current_trace->start)
2972 				current_trace->start(tr);
2973 			tracing_start();
2974 		} else {
2975 			tracer_enabled = 0;
2976 			tracing_stop();
2977 			if (current_trace->stop)
2978 				current_trace->stop(tr);
2979 		}
2980 	}
2981 	mutex_unlock(&trace_types_lock);
2982 
2983 	*ppos += cnt;
2984 
2985 	return cnt;
2986 }
2987 
2988 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)2989 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2990 		       size_t cnt, loff_t *ppos)
2991 {
2992 	char buf[MAX_TRACER_SIZE+2];
2993 	int r;
2994 
2995 	mutex_lock(&trace_types_lock);
2996 	if (current_trace)
2997 		r = sprintf(buf, "%s\n", current_trace->name);
2998 	else
2999 		r = sprintf(buf, "\n");
3000 	mutex_unlock(&trace_types_lock);
3001 
3002 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3003 }
3004 
tracer_init(struct tracer * t,struct trace_array * tr)3005 int tracer_init(struct tracer *t, struct trace_array *tr)
3006 {
3007 	tracing_reset_online_cpus(tr);
3008 	return t->init(tr);
3009 }
3010 
__tracing_resize_ring_buffer(unsigned long size)3011 static int __tracing_resize_ring_buffer(unsigned long size)
3012 {
3013 	int ret;
3014 
3015 	/*
3016 	 * If kernel or user changes the size of the ring buffer
3017 	 * we use the size that was given, and we can forget about
3018 	 * expanding it later.
3019 	 */
3020 	ring_buffer_expanded = 1;
3021 
3022 	ret = ring_buffer_resize(global_trace.buffer, size);
3023 	if (ret < 0)
3024 		return ret;
3025 
3026 	if (!current_trace->use_max_tr)
3027 		goto out;
3028 
3029 	ret = ring_buffer_resize(max_tr.buffer, size);
3030 	if (ret < 0) {
3031 		int r;
3032 
3033 		r = ring_buffer_resize(global_trace.buffer,
3034 				       global_trace.entries);
3035 		if (r < 0) {
3036 			/*
3037 			 * AARGH! We are left with different
3038 			 * size max buffer!!!!
3039 			 * The max buffer is our "snapshot" buffer.
3040 			 * When a tracer needs a snapshot (one of the
3041 			 * latency tracers), it swaps the max buffer
3042 			 * with the saved snap shot. We succeeded to
3043 			 * update the size of the main buffer, but failed to
3044 			 * update the size of the max buffer. But when we tried
3045 			 * to reset the main buffer to the original size, we
3046 			 * failed there too. This is very unlikely to
3047 			 * happen, but if it does, warn and kill all
3048 			 * tracing.
3049 			 */
3050 			WARN_ON(1);
3051 			tracing_disabled = 1;
3052 		}
3053 		return ret;
3054 	}
3055 
3056 	max_tr.entries = size;
3057  out:
3058 	global_trace.entries = size;
3059 
3060 	return ret;
3061 }
3062 
tracing_resize_ring_buffer(unsigned long size)3063 static ssize_t tracing_resize_ring_buffer(unsigned long size)
3064 {
3065 	int cpu, ret = size;
3066 
3067 	mutex_lock(&trace_types_lock);
3068 
3069 	tracing_stop();
3070 
3071 	/* disable all cpu buffers */
3072 	for_each_tracing_cpu(cpu) {
3073 		if (global_trace.data[cpu])
3074 			atomic_inc(&global_trace.data[cpu]->disabled);
3075 		if (max_tr.data[cpu])
3076 			atomic_inc(&max_tr.data[cpu]->disabled);
3077 	}
3078 
3079 	if (size != global_trace.entries)
3080 		ret = __tracing_resize_ring_buffer(size);
3081 
3082 	if (ret < 0)
3083 		ret = -ENOMEM;
3084 
3085 	for_each_tracing_cpu(cpu) {
3086 		if (global_trace.data[cpu])
3087 			atomic_dec(&global_trace.data[cpu]->disabled);
3088 		if (max_tr.data[cpu])
3089 			atomic_dec(&max_tr.data[cpu]->disabled);
3090 	}
3091 
3092 	tracing_start();
3093 	mutex_unlock(&trace_types_lock);
3094 
3095 	return ret;
3096 }
3097 
3098 
3099 /**
3100  * tracing_update_buffers - used by tracing facility to expand ring buffers
3101  *
3102  * To save on memory when the tracing is never used on a system with it
3103  * configured in. The ring buffers are set to a minimum size. But once
3104  * a user starts to use the tracing facility, then they need to grow
3105  * to their default size.
3106  *
3107  * This function is to be called when a tracer is about to be used.
3108  */
tracing_update_buffers(void)3109 int tracing_update_buffers(void)
3110 {
3111 	int ret = 0;
3112 
3113 	mutex_lock(&trace_types_lock);
3114 	if (!ring_buffer_expanded)
3115 		ret = __tracing_resize_ring_buffer(trace_buf_size);
3116 	mutex_unlock(&trace_types_lock);
3117 
3118 	return ret;
3119 }
3120 
3121 struct trace_option_dentry;
3122 
3123 static struct trace_option_dentry *
3124 create_trace_option_files(struct tracer *tracer);
3125 
3126 static void
3127 destroy_trace_option_files(struct trace_option_dentry *topts);
3128 
tracing_set_tracer(const char * buf)3129 static int tracing_set_tracer(const char *buf)
3130 {
3131 	static struct trace_option_dentry *topts;
3132 	struct trace_array *tr = &global_trace;
3133 	struct tracer *t;
3134 	int ret = 0;
3135 
3136 	mutex_lock(&trace_types_lock);
3137 
3138 	if (!ring_buffer_expanded) {
3139 		ret = __tracing_resize_ring_buffer(trace_buf_size);
3140 		if (ret < 0)
3141 			goto out;
3142 		ret = 0;
3143 	}
3144 
3145 	for (t = trace_types; t; t = t->next) {
3146 		if (strcmp(t->name, buf) == 0)
3147 			break;
3148 	}
3149 	if (!t) {
3150 		ret = -EINVAL;
3151 		goto out;
3152 	}
3153 	if (t == current_trace)
3154 		goto out;
3155 
3156 	trace_branch_disable();
3157 
3158 	current_trace->enabled = false;
3159 
3160 	if (current_trace && current_trace->reset)
3161 		current_trace->reset(tr);
3162 	if (current_trace && current_trace->use_max_tr) {
3163 		/*
3164 		 * We don't free the ring buffer. instead, resize it because
3165 		 * The max_tr ring buffer has some state (e.g. ring->clock) and
3166 		 * we want preserve it.
3167 		 */
3168 		ring_buffer_resize(max_tr.buffer, 1);
3169 		max_tr.entries = 1;
3170 	}
3171 	destroy_trace_option_files(topts);
3172 
3173 	current_trace = t;
3174 
3175 	topts = create_trace_option_files(current_trace);
3176 	if (current_trace->use_max_tr) {
3177 		ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
3178 		if (ret < 0)
3179 			goto out;
3180 		max_tr.entries = global_trace.entries;
3181 	}
3182 
3183 	if (t->init) {
3184 		ret = tracer_init(t, tr);
3185 		if (ret)
3186 			goto out;
3187 	}
3188 
3189 	current_trace->enabled = true;
3190 	trace_branch_enable(tr);
3191  out:
3192 	mutex_unlock(&trace_types_lock);
3193 
3194 	return ret;
3195 }
3196 
3197 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)3198 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3199 			size_t cnt, loff_t *ppos)
3200 {
3201 	char buf[MAX_TRACER_SIZE+1];
3202 	int i;
3203 	size_t ret;
3204 	int err;
3205 
3206 	ret = cnt;
3207 
3208 	if (cnt > MAX_TRACER_SIZE)
3209 		cnt = MAX_TRACER_SIZE;
3210 
3211 	if (copy_from_user(&buf, ubuf, cnt))
3212 		return -EFAULT;
3213 
3214 	buf[cnt] = 0;
3215 
3216 	/* strip ending whitespace. */
3217 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3218 		buf[i] = 0;
3219 
3220 	err = tracing_set_tracer(buf);
3221 	if (err)
3222 		return err;
3223 
3224 	*ppos += ret;
3225 
3226 	return ret;
3227 }
3228 
3229 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)3230 tracing_max_lat_read(struct file *filp, char __user *ubuf,
3231 		     size_t cnt, loff_t *ppos)
3232 {
3233 	unsigned long *ptr = filp->private_data;
3234 	char buf[64];
3235 	int r;
3236 
3237 	r = snprintf(buf, sizeof(buf), "%ld\n",
3238 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
3239 	if (r > sizeof(buf))
3240 		r = sizeof(buf);
3241 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3242 }
3243 
3244 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)3245 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3246 		      size_t cnt, loff_t *ppos)
3247 {
3248 	unsigned long *ptr = filp->private_data;
3249 	unsigned long val;
3250 	int ret;
3251 
3252 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3253 	if (ret)
3254 		return ret;
3255 
3256 	*ptr = val * 1000;
3257 
3258 	return cnt;
3259 }
3260 
tracing_open_pipe(struct inode * inode,struct file * filp)3261 static int tracing_open_pipe(struct inode *inode, struct file *filp)
3262 {
3263 	long cpu_file = (long) inode->i_private;
3264 	struct trace_iterator *iter;
3265 	int ret = 0;
3266 
3267 	if (tracing_disabled)
3268 		return -ENODEV;
3269 
3270 	mutex_lock(&trace_types_lock);
3271 
3272 	/* create a buffer to store the information to pass to userspace */
3273 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
3274 	if (!iter) {
3275 		ret = -ENOMEM;
3276 		goto out;
3277 	}
3278 
3279 	/*
3280 	 * We make a copy of the current tracer to avoid concurrent
3281 	 * changes on it while we are reading.
3282 	 */
3283 	iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
3284 	if (!iter->trace) {
3285 		ret = -ENOMEM;
3286 		goto fail;
3287 	}
3288 	if (current_trace)
3289 		*iter->trace = *current_trace;
3290 
3291 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
3292 		ret = -ENOMEM;
3293 		goto fail;
3294 	}
3295 
3296 	/* trace pipe does not show start of buffer */
3297 	cpumask_setall(iter->started);
3298 
3299 	if (trace_flags & TRACE_ITER_LATENCY_FMT)
3300 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
3301 
3302 	iter->cpu_file = cpu_file;
3303 	iter->tr = &global_trace;
3304 	mutex_init(&iter->mutex);
3305 	filp->private_data = iter;
3306 
3307 	if (iter->trace->pipe_open)
3308 		iter->trace->pipe_open(iter);
3309 
3310 	nonseekable_open(inode, filp);
3311 out:
3312 	mutex_unlock(&trace_types_lock);
3313 	return ret;
3314 
3315 fail:
3316 	kfree(iter->trace);
3317 	kfree(iter);
3318 	mutex_unlock(&trace_types_lock);
3319 	return ret;
3320 }
3321 
tracing_release_pipe(struct inode * inode,struct file * file)3322 static int tracing_release_pipe(struct inode *inode, struct file *file)
3323 {
3324 	struct trace_iterator *iter = file->private_data;
3325 
3326 	mutex_lock(&trace_types_lock);
3327 
3328 	if (iter->trace->pipe_close)
3329 		iter->trace->pipe_close(iter);
3330 
3331 	mutex_unlock(&trace_types_lock);
3332 
3333 	free_cpumask_var(iter->started);
3334 	mutex_destroy(&iter->mutex);
3335 	kfree(iter->trace);
3336 	kfree(iter);
3337 
3338 	return 0;
3339 }
3340 
3341 static unsigned int
tracing_poll_pipe(struct file * filp,poll_table * poll_table)3342 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
3343 {
3344 	struct trace_iterator *iter = filp->private_data;
3345 
3346 	if (trace_flags & TRACE_ITER_BLOCK) {
3347 		/*
3348 		 * Always select as readable when in blocking mode
3349 		 */
3350 		return POLLIN | POLLRDNORM;
3351 	} else {
3352 		if (!trace_empty(iter))
3353 			return POLLIN | POLLRDNORM;
3354 		poll_wait(filp, &trace_wait, poll_table);
3355 		if (!trace_empty(iter))
3356 			return POLLIN | POLLRDNORM;
3357 
3358 		return 0;
3359 	}
3360 }
3361 
3362 
default_wait_pipe(struct trace_iterator * iter)3363 void default_wait_pipe(struct trace_iterator *iter)
3364 {
3365 	DEFINE_WAIT(wait);
3366 
3367 	prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
3368 
3369 	if (trace_empty(iter))
3370 		schedule();
3371 
3372 	finish_wait(&trace_wait, &wait);
3373 }
3374 
3375 /*
3376  * This is a make-shift waitqueue.
3377  * A tracer might use this callback on some rare cases:
3378  *
3379  *  1) the current tracer might hold the runqueue lock when it wakes up
3380  *     a reader, hence a deadlock (sched, function, and function graph tracers)
3381  *  2) the function tracers, trace all functions, we don't want
3382  *     the overhead of calling wake_up and friends
3383  *     (and tracing them too)
3384  *
3385  *     Anyway, this is really very primitive wakeup.
3386  */
poll_wait_pipe(struct trace_iterator * iter)3387 void poll_wait_pipe(struct trace_iterator *iter)
3388 {
3389 	set_current_state(TASK_INTERRUPTIBLE);
3390 	/* sleep for 100 msecs, and try again. */
3391 	schedule_timeout(HZ / 10);
3392 }
3393 
3394 /* Must be called with trace_types_lock mutex held. */
tracing_wait_pipe(struct file * filp)3395 static int tracing_wait_pipe(struct file *filp)
3396 {
3397 	struct trace_iterator *iter = filp->private_data;
3398 
3399 	while (trace_empty(iter)) {
3400 
3401 		if ((filp->f_flags & O_NONBLOCK)) {
3402 			return -EAGAIN;
3403 		}
3404 
3405 		mutex_unlock(&iter->mutex);
3406 
3407 		iter->trace->wait_pipe(iter);
3408 
3409 		mutex_lock(&iter->mutex);
3410 
3411 		if (signal_pending(current))
3412 			return -EINTR;
3413 
3414 		/*
3415 		 * We block until we read something and tracing is disabled.
3416 		 * We still block if tracing is disabled, but we have never
3417 		 * read anything. This allows a user to cat this file, and
3418 		 * then enable tracing. But after we have read something,
3419 		 * we give an EOF when tracing is again disabled.
3420 		 *
3421 		 * iter->pos will be 0 if we haven't read anything.
3422 		 */
3423 		if (!tracer_enabled && iter->pos)
3424 			break;
3425 	}
3426 
3427 	return 1;
3428 }
3429 
3430 /*
3431  * Consumer reader.
3432  */
3433 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)3434 tracing_read_pipe(struct file *filp, char __user *ubuf,
3435 		  size_t cnt, loff_t *ppos)
3436 {
3437 	struct trace_iterator *iter = filp->private_data;
3438 	static struct tracer *old_tracer;
3439 	ssize_t sret;
3440 
3441 	/* return any leftover data */
3442 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3443 	if (sret != -EBUSY)
3444 		return sret;
3445 
3446 	trace_seq_init(&iter->seq);
3447 
3448 	/* copy the tracer to avoid using a global lock all around */
3449 	mutex_lock(&trace_types_lock);
3450 	if (unlikely(old_tracer != current_trace && current_trace)) {
3451 		old_tracer = current_trace;
3452 		*iter->trace = *current_trace;
3453 	}
3454 	mutex_unlock(&trace_types_lock);
3455 
3456 	/*
3457 	 * Avoid more than one consumer on a single file descriptor
3458 	 * This is just a matter of traces coherency, the ring buffer itself
3459 	 * is protected.
3460 	 */
3461 	mutex_lock(&iter->mutex);
3462 	if (iter->trace->read) {
3463 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
3464 		if (sret)
3465 			goto out;
3466 	}
3467 
3468 waitagain:
3469 	sret = tracing_wait_pipe(filp);
3470 	if (sret <= 0)
3471 		goto out;
3472 
3473 	/* stop when tracing is finished */
3474 	if (trace_empty(iter)) {
3475 		sret = 0;
3476 		goto out;
3477 	}
3478 
3479 	if (cnt >= PAGE_SIZE)
3480 		cnt = PAGE_SIZE - 1;
3481 
3482 	/* reset all but tr, trace, and overruns */
3483 	memset(&iter->seq, 0,
3484 	       sizeof(struct trace_iterator) -
3485 	       offsetof(struct trace_iterator, seq));
3486 	cpumask_clear(iter->started);
3487 	iter->pos = -1;
3488 
3489 	trace_event_read_lock();
3490 	trace_access_lock(iter->cpu_file);
3491 	while (trace_find_next_entry_inc(iter) != NULL) {
3492 		enum print_line_t ret;
3493 		int len = iter->seq.len;
3494 
3495 		ret = print_trace_line(iter);
3496 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
3497 			/* don't print partial lines */
3498 			iter->seq.len = len;
3499 			break;
3500 		}
3501 		if (ret != TRACE_TYPE_NO_CONSUME)
3502 			trace_consume(iter);
3503 
3504 		if (iter->seq.len >= cnt)
3505 			break;
3506 
3507 		/*
3508 		 * Setting the full flag means we reached the trace_seq buffer
3509 		 * size and we should leave by partial output condition above.
3510 		 * One of the trace_seq_* functions is not used properly.
3511 		 */
3512 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
3513 			  iter->ent->type);
3514 	}
3515 	trace_access_unlock(iter->cpu_file);
3516 	trace_event_read_unlock();
3517 
3518 	/* Now copy what we have to the user */
3519 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3520 	if (iter->seq.readpos >= iter->seq.len)
3521 		trace_seq_init(&iter->seq);
3522 
3523 	/*
3524 	 * If there was nothing to send to user, in spite of consuming trace
3525 	 * entries, go back to wait for more entries.
3526 	 */
3527 	if (sret == -EBUSY)
3528 		goto waitagain;
3529 
3530 out:
3531 	mutex_unlock(&iter->mutex);
3532 
3533 	return sret;
3534 }
3535 
tracing_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)3536 static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
3537 				     struct pipe_buffer *buf)
3538 {
3539 	__free_page(buf->page);
3540 }
3541 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)3542 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3543 				     unsigned int idx)
3544 {
3545 	__free_page(spd->pages[idx]);
3546 }
3547 
3548 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3549 	.can_merge		= 0,
3550 	.map			= generic_pipe_buf_map,
3551 	.unmap			= generic_pipe_buf_unmap,
3552 	.confirm		= generic_pipe_buf_confirm,
3553 	.release		= tracing_pipe_buf_release,
3554 	.steal			= generic_pipe_buf_steal,
3555 	.get			= generic_pipe_buf_get,
3556 };
3557 
3558 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)3559 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3560 {
3561 	size_t count;
3562 	int ret;
3563 
3564 	/* Seq buffer is page-sized, exactly what we need. */
3565 	for (;;) {
3566 		count = iter->seq.len;
3567 		ret = print_trace_line(iter);
3568 		count = iter->seq.len - count;
3569 		if (rem < count) {
3570 			rem = 0;
3571 			iter->seq.len -= count;
3572 			break;
3573 		}
3574 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
3575 			iter->seq.len -= count;
3576 			break;
3577 		}
3578 
3579 		if (ret != TRACE_TYPE_NO_CONSUME)
3580 			trace_consume(iter);
3581 		rem -= count;
3582 		if (!trace_find_next_entry_inc(iter))	{
3583 			rem = 0;
3584 			iter->ent = NULL;
3585 			break;
3586 		}
3587 	}
3588 
3589 	return rem;
3590 }
3591 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)3592 static ssize_t tracing_splice_read_pipe(struct file *filp,
3593 					loff_t *ppos,
3594 					struct pipe_inode_info *pipe,
3595 					size_t len,
3596 					unsigned int flags)
3597 {
3598 	struct page *pages_def[PIPE_DEF_BUFFERS];
3599 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
3600 	struct trace_iterator *iter = filp->private_data;
3601 	struct splice_pipe_desc spd = {
3602 		.pages		= pages_def,
3603 		.partial	= partial_def,
3604 		.nr_pages	= 0, /* This gets updated below. */
3605 		.nr_pages_max	= PIPE_DEF_BUFFERS,
3606 		.flags		= flags,
3607 		.ops		= &tracing_pipe_buf_ops,
3608 		.spd_release	= tracing_spd_release_pipe,
3609 	};
3610 	static struct tracer *old_tracer;
3611 	ssize_t ret;
3612 	size_t rem;
3613 	unsigned int i;
3614 
3615 	if (splice_grow_spd(pipe, &spd))
3616 		return -ENOMEM;
3617 
3618 	/* copy the tracer to avoid using a global lock all around */
3619 	mutex_lock(&trace_types_lock);
3620 	if (unlikely(old_tracer != current_trace && current_trace)) {
3621 		old_tracer = current_trace;
3622 		*iter->trace = *current_trace;
3623 	}
3624 	mutex_unlock(&trace_types_lock);
3625 
3626 	mutex_lock(&iter->mutex);
3627 
3628 	if (iter->trace->splice_read) {
3629 		ret = iter->trace->splice_read(iter, filp,
3630 					       ppos, pipe, len, flags);
3631 		if (ret)
3632 			goto out_err;
3633 	}
3634 
3635 	ret = tracing_wait_pipe(filp);
3636 	if (ret <= 0)
3637 		goto out_err;
3638 
3639 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
3640 		ret = -EFAULT;
3641 		goto out_err;
3642 	}
3643 
3644 	trace_event_read_lock();
3645 	trace_access_lock(iter->cpu_file);
3646 
3647 	/* Fill as many pages as possible. */
3648 	for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
3649 		spd.pages[i] = alloc_page(GFP_KERNEL);
3650 		if (!spd.pages[i])
3651 			break;
3652 
3653 		rem = tracing_fill_pipe_page(rem, iter);
3654 
3655 		/* Copy the data into the page, so we can start over. */
3656 		ret = trace_seq_to_buffer(&iter->seq,
3657 					  page_address(spd.pages[i]),
3658 					  iter->seq.len);
3659 		if (ret < 0) {
3660 			__free_page(spd.pages[i]);
3661 			break;
3662 		}
3663 		spd.partial[i].offset = 0;
3664 		spd.partial[i].len = iter->seq.len;
3665 
3666 		trace_seq_init(&iter->seq);
3667 	}
3668 
3669 	trace_access_unlock(iter->cpu_file);
3670 	trace_event_read_unlock();
3671 	mutex_unlock(&iter->mutex);
3672 
3673 	spd.nr_pages = i;
3674 
3675 	ret = splice_to_pipe(pipe, &spd);
3676 out:
3677 	splice_shrink_spd(&spd);
3678 	return ret;
3679 
3680 out_err:
3681 	mutex_unlock(&iter->mutex);
3682 	goto out;
3683 }
3684 
3685 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)3686 tracing_entries_read(struct file *filp, char __user *ubuf,
3687 		     size_t cnt, loff_t *ppos)
3688 {
3689 	struct trace_array *tr = filp->private_data;
3690 	char buf[96];
3691 	int r;
3692 
3693 	mutex_lock(&trace_types_lock);
3694 	if (!ring_buffer_expanded)
3695 		r = sprintf(buf, "%lu (expanded: %lu)\n",
3696 			    tr->entries >> 10,
3697 			    trace_buf_size >> 10);
3698 	else
3699 		r = sprintf(buf, "%lu\n", tr->entries >> 10);
3700 	mutex_unlock(&trace_types_lock);
3701 
3702 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3703 }
3704 
3705 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)3706 tracing_entries_write(struct file *filp, const char __user *ubuf,
3707 		      size_t cnt, loff_t *ppos)
3708 {
3709 	unsigned long val;
3710 	int ret;
3711 
3712 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3713 	if (ret)
3714 		return ret;
3715 
3716 	/* must have at least 1 entry */
3717 	if (!val)
3718 		return -EINVAL;
3719 
3720 	/* value is in KB */
3721 	val <<= 10;
3722 
3723 	ret = tracing_resize_ring_buffer(val);
3724 	if (ret < 0)
3725 		return ret;
3726 
3727 	*ppos += cnt;
3728 
3729 	return cnt;
3730 }
3731 
3732 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)3733 tracing_total_entries_read(struct file *filp, char __user *ubuf,
3734 				size_t cnt, loff_t *ppos)
3735 {
3736 	struct trace_array *tr = filp->private_data;
3737 	char buf[64];
3738 	int r, cpu;
3739 	unsigned long size = 0, expanded_size = 0;
3740 
3741 	mutex_lock(&trace_types_lock);
3742 	for_each_tracing_cpu(cpu) {
3743 		size += tr->entries >> 10;
3744 		if (!ring_buffer_expanded)
3745 			expanded_size += trace_buf_size >> 10;
3746 	}
3747 	if (ring_buffer_expanded)
3748 		r = sprintf(buf, "%lu\n", size);
3749 	else
3750 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
3751 	mutex_unlock(&trace_types_lock);
3752 
3753 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3754 }
3755 
3756 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)3757 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
3758 			  size_t cnt, loff_t *ppos)
3759 {
3760 	/*
3761 	 * There is no need to read what the user has written, this function
3762 	 * is just to make sure that there is no error when "echo" is used
3763 	 */
3764 
3765 	*ppos += cnt;
3766 
3767 	return cnt;
3768 }
3769 
3770 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)3771 tracing_free_buffer_release(struct inode *inode, struct file *filp)
3772 {
3773 	/* disable tracing ? */
3774 	if (trace_flags & TRACE_ITER_STOP_ON_FREE)
3775 		tracing_off();
3776 	/* resize the ring buffer to 0 */
3777 	tracing_resize_ring_buffer(0);
3778 
3779 	return 0;
3780 }
3781 
3782 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)3783 tracing_mark_write(struct file *filp, const char __user *ubuf,
3784 					size_t cnt, loff_t *fpos)
3785 {
3786 	unsigned long addr = (unsigned long)ubuf;
3787 	struct ring_buffer_event *event;
3788 	struct ring_buffer *buffer;
3789 	struct print_entry *entry;
3790 	unsigned long irq_flags;
3791 	struct page *pages[2];
3792 	int nr_pages = 1;
3793 	ssize_t written;
3794 	void *page1;
3795 	void *page2;
3796 	int offset;
3797 	int size;
3798 	int len;
3799 	int ret;
3800 
3801 	if (tracing_disabled)
3802 		return -EINVAL;
3803 
3804 	if (cnt > TRACE_BUF_SIZE)
3805 		cnt = TRACE_BUF_SIZE;
3806 
3807 	/*
3808 	 * Userspace is injecting traces into the kernel trace buffer.
3809 	 * We want to be as non intrusive as possible.
3810 	 * To do so, we do not want to allocate any special buffers
3811 	 * or take any locks, but instead write the userspace data
3812 	 * straight into the ring buffer.
3813 	 *
3814 	 * First we need to pin the userspace buffer into memory,
3815 	 * which, most likely it is, because it just referenced it.
3816 	 * But there's no guarantee that it is. By using get_user_pages_fast()
3817 	 * and kmap_atomic/kunmap_atomic() we can get access to the
3818 	 * pages directly. We then write the data directly into the
3819 	 * ring buffer.
3820 	 */
3821 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
3822 
3823 	/* check if we cross pages */
3824 	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
3825 		nr_pages = 2;
3826 
3827 	offset = addr & (PAGE_SIZE - 1);
3828 	addr &= PAGE_MASK;
3829 
3830 	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
3831 	if (ret < nr_pages) {
3832 		while (--ret >= 0)
3833 			put_page(pages[ret]);
3834 		written = -EFAULT;
3835 		goto out;
3836 	}
3837 
3838 	page1 = kmap_atomic(pages[0]);
3839 	if (nr_pages == 2)
3840 		page2 = kmap_atomic(pages[1]);
3841 
3842 	local_save_flags(irq_flags);
3843 	size = sizeof(*entry) + cnt + 2; /* possible \n added */
3844 	buffer = global_trace.buffer;
3845 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3846 					  irq_flags, preempt_count());
3847 	if (!event) {
3848 		/* Ring buffer disabled, return as if not open for write */
3849 		written = -EBADF;
3850 		goto out_unlock;
3851 	}
3852 
3853 	entry = ring_buffer_event_data(event);
3854 	entry->ip = _THIS_IP_;
3855 
3856 	if (nr_pages == 2) {
3857 		len = PAGE_SIZE - offset;
3858 		memcpy(&entry->buf, page1 + offset, len);
3859 		memcpy(&entry->buf[len], page2, cnt - len);
3860 	} else
3861 		memcpy(&entry->buf, page1 + offset, cnt);
3862 
3863 	if (entry->buf[cnt - 1] != '\n') {
3864 		entry->buf[cnt] = '\n';
3865 		entry->buf[cnt + 1] = '\0';
3866 	} else
3867 		entry->buf[cnt] = '\0';
3868 
3869 	ring_buffer_unlock_commit(buffer, event);
3870 
3871 	written = cnt;
3872 
3873 	*fpos += written;
3874 
3875  out_unlock:
3876 	if (nr_pages == 2)
3877 		kunmap_atomic(page2);
3878 	kunmap_atomic(page1);
3879 	while (nr_pages > 0)
3880 		put_page(pages[--nr_pages]);
3881  out:
3882 	return written;
3883 }
3884 
tracing_clock_show(struct seq_file * m,void * v)3885 static int tracing_clock_show(struct seq_file *m, void *v)
3886 {
3887 	int i;
3888 
3889 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3890 		seq_printf(m,
3891 			"%s%s%s%s", i ? " " : "",
3892 			i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3893 			i == trace_clock_id ? "]" : "");
3894 	seq_putc(m, '\n');
3895 
3896 	return 0;
3897 }
3898 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)3899 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3900 				   size_t cnt, loff_t *fpos)
3901 {
3902 	char buf[64];
3903 	const char *clockstr;
3904 	int i;
3905 
3906 	if (cnt >= sizeof(buf))
3907 		return -EINVAL;
3908 
3909 	if (copy_from_user(&buf, ubuf, cnt))
3910 		return -EFAULT;
3911 
3912 	buf[cnt] = 0;
3913 
3914 	clockstr = strstrip(buf);
3915 
3916 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
3917 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
3918 			break;
3919 	}
3920 	if (i == ARRAY_SIZE(trace_clocks))
3921 		return -EINVAL;
3922 
3923 	trace_clock_id = i;
3924 
3925 	mutex_lock(&trace_types_lock);
3926 
3927 	ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
3928 	if (max_tr.buffer)
3929 		ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
3930 
3931 	mutex_unlock(&trace_types_lock);
3932 
3933 	*fpos += cnt;
3934 
3935 	return cnt;
3936 }
3937 
tracing_clock_open(struct inode * inode,struct file * file)3938 static int tracing_clock_open(struct inode *inode, struct file *file)
3939 {
3940 	if (tracing_disabled)
3941 		return -ENODEV;
3942 	return single_open(file, tracing_clock_show, NULL);
3943 }
3944 
3945 static const struct file_operations tracing_max_lat_fops = {
3946 	.open		= tracing_open_generic,
3947 	.read		= tracing_max_lat_read,
3948 	.write		= tracing_max_lat_write,
3949 	.llseek		= generic_file_llseek,
3950 };
3951 
3952 static const struct file_operations tracing_ctrl_fops = {
3953 	.open		= tracing_open_generic,
3954 	.read		= tracing_ctrl_read,
3955 	.write		= tracing_ctrl_write,
3956 	.llseek		= generic_file_llseek,
3957 };
3958 
3959 static const struct file_operations set_tracer_fops = {
3960 	.open		= tracing_open_generic,
3961 	.read		= tracing_set_trace_read,
3962 	.write		= tracing_set_trace_write,
3963 	.llseek		= generic_file_llseek,
3964 };
3965 
3966 static const struct file_operations tracing_pipe_fops = {
3967 	.open		= tracing_open_pipe,
3968 	.poll		= tracing_poll_pipe,
3969 	.read		= tracing_read_pipe,
3970 	.splice_read	= tracing_splice_read_pipe,
3971 	.release	= tracing_release_pipe,
3972 	.llseek		= no_llseek,
3973 };
3974 
3975 static const struct file_operations tracing_entries_fops = {
3976 	.open		= tracing_open_generic,
3977 	.read		= tracing_entries_read,
3978 	.write		= tracing_entries_write,
3979 	.llseek		= generic_file_llseek,
3980 };
3981 
3982 static const struct file_operations tracing_total_entries_fops = {
3983 	.open		= tracing_open_generic,
3984 	.read		= tracing_total_entries_read,
3985 	.llseek		= generic_file_llseek,
3986 };
3987 
3988 static const struct file_operations tracing_free_buffer_fops = {
3989 	.write		= tracing_free_buffer_write,
3990 	.release	= tracing_free_buffer_release,
3991 };
3992 
3993 static const struct file_operations tracing_mark_fops = {
3994 	.open		= tracing_open_generic,
3995 	.write		= tracing_mark_write,
3996 	.llseek		= generic_file_llseek,
3997 };
3998 
3999 static const struct file_operations trace_clock_fops = {
4000 	.open		= tracing_clock_open,
4001 	.read		= seq_read,
4002 	.llseek		= seq_lseek,
4003 	.release	= single_release,
4004 	.write		= tracing_clock_write,
4005 };
4006 
4007 struct ftrace_buffer_info {
4008 	struct trace_array	*tr;
4009 	void			*spare;
4010 	int			cpu;
4011 	unsigned int		read;
4012 };
4013 
tracing_buffers_open(struct inode * inode,struct file * filp)4014 static int tracing_buffers_open(struct inode *inode, struct file *filp)
4015 {
4016 	int cpu = (int)(long)inode->i_private;
4017 	struct ftrace_buffer_info *info;
4018 
4019 	if (tracing_disabled)
4020 		return -ENODEV;
4021 
4022 	info = kzalloc(sizeof(*info), GFP_KERNEL);
4023 	if (!info)
4024 		return -ENOMEM;
4025 
4026 	info->tr	= &global_trace;
4027 	info->cpu	= cpu;
4028 	info->spare	= NULL;
4029 	/* Force reading ring buffer for first read */
4030 	info->read	= (unsigned int)-1;
4031 
4032 	filp->private_data = info;
4033 
4034 	return nonseekable_open(inode, filp);
4035 }
4036 
4037 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4038 tracing_buffers_read(struct file *filp, char __user *ubuf,
4039 		     size_t count, loff_t *ppos)
4040 {
4041 	struct ftrace_buffer_info *info = filp->private_data;
4042 	ssize_t ret;
4043 	size_t size;
4044 
4045 	if (!count)
4046 		return 0;
4047 
4048 	if (!info->spare)
4049 		info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
4050 	if (!info->spare)
4051 		return -ENOMEM;
4052 
4053 	/* Do we have previous read data to read? */
4054 	if (info->read < PAGE_SIZE)
4055 		goto read;
4056 
4057 	trace_access_lock(info->cpu);
4058 	ret = ring_buffer_read_page(info->tr->buffer,
4059 				    &info->spare,
4060 				    count,
4061 				    info->cpu, 0);
4062 	trace_access_unlock(info->cpu);
4063 	if (ret < 0)
4064 		return 0;
4065 
4066 	info->read = 0;
4067 
4068 read:
4069 	size = PAGE_SIZE - info->read;
4070 	if (size > count)
4071 		size = count;
4072 
4073 	ret = copy_to_user(ubuf, info->spare + info->read, size);
4074 	if (ret == size)
4075 		return -EFAULT;
4076 	size -= ret;
4077 
4078 	*ppos += size;
4079 	info->read += size;
4080 
4081 	return size;
4082 }
4083 
tracing_buffers_release(struct inode * inode,struct file * file)4084 static int tracing_buffers_release(struct inode *inode, struct file *file)
4085 {
4086 	struct ftrace_buffer_info *info = file->private_data;
4087 
4088 	if (info->spare)
4089 		ring_buffer_free_read_page(info->tr->buffer, info->spare);
4090 	kfree(info);
4091 
4092 	return 0;
4093 }
4094 
4095 struct buffer_ref {
4096 	struct ring_buffer	*buffer;
4097 	void			*page;
4098 	int			ref;
4099 };
4100 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)4101 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
4102 				    struct pipe_buffer *buf)
4103 {
4104 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
4105 
4106 	if (--ref->ref)
4107 		return;
4108 
4109 	ring_buffer_free_read_page(ref->buffer, ref->page);
4110 	kfree(ref);
4111 	buf->private = 0;
4112 }
4113 
buffer_pipe_buf_steal(struct pipe_inode_info * pipe,struct pipe_buffer * buf)4114 static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
4115 				 struct pipe_buffer *buf)
4116 {
4117 	return 1;
4118 }
4119 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)4120 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
4121 				struct pipe_buffer *buf)
4122 {
4123 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
4124 
4125 	ref->ref++;
4126 }
4127 
4128 /* Pipe buffer operations for a buffer. */
4129 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
4130 	.can_merge		= 0,
4131 	.map			= generic_pipe_buf_map,
4132 	.unmap			= generic_pipe_buf_unmap,
4133 	.confirm		= generic_pipe_buf_confirm,
4134 	.release		= buffer_pipe_buf_release,
4135 	.steal			= buffer_pipe_buf_steal,
4136 	.get			= buffer_pipe_buf_get,
4137 };
4138 
4139 /*
4140  * Callback from splice_to_pipe(), if we need to release some pages
4141  * at the end of the spd in case we error'ed out in filling the pipe.
4142  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)4143 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
4144 {
4145 	struct buffer_ref *ref =
4146 		(struct buffer_ref *)spd->partial[i].private;
4147 
4148 	if (--ref->ref)
4149 		return;
4150 
4151 	ring_buffer_free_read_page(ref->buffer, ref->page);
4152 	kfree(ref);
4153 	spd->partial[i].private = 0;
4154 }
4155 
4156 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)4157 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4158 			    struct pipe_inode_info *pipe, size_t len,
4159 			    unsigned int flags)
4160 {
4161 	struct ftrace_buffer_info *info = file->private_data;
4162 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
4163 	struct page *pages_def[PIPE_DEF_BUFFERS];
4164 	struct splice_pipe_desc spd = {
4165 		.pages		= pages_def,
4166 		.partial	= partial_def,
4167 		.nr_pages_max	= PIPE_DEF_BUFFERS,
4168 		.flags		= flags,
4169 		.ops		= &buffer_pipe_buf_ops,
4170 		.spd_release	= buffer_spd_release,
4171 	};
4172 	struct buffer_ref *ref;
4173 	int entries, size, i;
4174 	size_t ret;
4175 
4176 	if (splice_grow_spd(pipe, &spd))
4177 		return -ENOMEM;
4178 
4179 	if (*ppos & (PAGE_SIZE - 1)) {
4180 		WARN_ONCE(1, "Ftrace: previous read must page-align\n");
4181 		ret = -EINVAL;
4182 		goto out;
4183 	}
4184 
4185 	if (len & (PAGE_SIZE - 1)) {
4186 		WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
4187 		if (len < PAGE_SIZE) {
4188 			ret = -EINVAL;
4189 			goto out;
4190 		}
4191 		len &= PAGE_MASK;
4192 	}
4193 
4194 	trace_access_lock(info->cpu);
4195 	entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
4196 
4197 	for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
4198 		struct page *page;
4199 		int r;
4200 
4201 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
4202 		if (!ref)
4203 			break;
4204 
4205 		ref->ref = 1;
4206 		ref->buffer = info->tr->buffer;
4207 		ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
4208 		if (!ref->page) {
4209 			kfree(ref);
4210 			break;
4211 		}
4212 
4213 		r = ring_buffer_read_page(ref->buffer, &ref->page,
4214 					  len, info->cpu, 1);
4215 		if (r < 0) {
4216 			ring_buffer_free_read_page(ref->buffer, ref->page);
4217 			kfree(ref);
4218 			break;
4219 		}
4220 
4221 		/*
4222 		 * zero out any left over data, this is going to
4223 		 * user land.
4224 		 */
4225 		size = ring_buffer_page_len(ref->page);
4226 		if (size < PAGE_SIZE)
4227 			memset(ref->page + size, 0, PAGE_SIZE - size);
4228 
4229 		page = virt_to_page(ref->page);
4230 
4231 		spd.pages[i] = page;
4232 		spd.partial[i].len = PAGE_SIZE;
4233 		spd.partial[i].offset = 0;
4234 		spd.partial[i].private = (unsigned long)ref;
4235 		spd.nr_pages++;
4236 		*ppos += PAGE_SIZE;
4237 
4238 		entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
4239 	}
4240 
4241 	trace_access_unlock(info->cpu);
4242 	spd.nr_pages = i;
4243 
4244 	/* did we read anything? */
4245 	if (!spd.nr_pages) {
4246 		if (flags & SPLICE_F_NONBLOCK)
4247 			ret = -EAGAIN;
4248 		else
4249 			ret = 0;
4250 		/* TODO: block */
4251 		goto out;
4252 	}
4253 
4254 	ret = splice_to_pipe(pipe, &spd);
4255 	splice_shrink_spd(&spd);
4256 out:
4257 	return ret;
4258 }
4259 
4260 static const struct file_operations tracing_buffers_fops = {
4261 	.open		= tracing_buffers_open,
4262 	.read		= tracing_buffers_read,
4263 	.release	= tracing_buffers_release,
4264 	.splice_read	= tracing_buffers_splice_read,
4265 	.llseek		= no_llseek,
4266 };
4267 
4268 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4269 tracing_stats_read(struct file *filp, char __user *ubuf,
4270 		   size_t count, loff_t *ppos)
4271 {
4272 	unsigned long cpu = (unsigned long)filp->private_data;
4273 	struct trace_array *tr = &global_trace;
4274 	struct trace_seq *s;
4275 	unsigned long cnt;
4276 	unsigned long long t;
4277 	unsigned long usec_rem;
4278 
4279 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4280 	if (!s)
4281 		return -ENOMEM;
4282 
4283 	trace_seq_init(s);
4284 
4285 	cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
4286 	trace_seq_printf(s, "entries: %ld\n", cnt);
4287 
4288 	cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
4289 	trace_seq_printf(s, "overrun: %ld\n", cnt);
4290 
4291 	cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
4292 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
4293 
4294 	cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
4295 	trace_seq_printf(s, "bytes: %ld\n", cnt);
4296 
4297 	t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
4298 	usec_rem = do_div(t, USEC_PER_SEC);
4299 	trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem);
4300 
4301 	t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
4302 	usec_rem = do_div(t, USEC_PER_SEC);
4303 	trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
4304 
4305 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
4306 
4307 	kfree(s);
4308 
4309 	return count;
4310 }
4311 
4312 static const struct file_operations tracing_stats_fops = {
4313 	.open		= tracing_open_generic,
4314 	.read		= tracing_stats_read,
4315 	.llseek		= generic_file_llseek,
4316 };
4317 
4318 #ifdef CONFIG_DYNAMIC_FTRACE
4319 
ftrace_arch_read_dyn_info(char * buf,int size)4320 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
4321 {
4322 	return 0;
4323 }
4324 
4325 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4326 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
4327 		  size_t cnt, loff_t *ppos)
4328 {
4329 	static char ftrace_dyn_info_buffer[1024];
4330 	static DEFINE_MUTEX(dyn_info_mutex);
4331 	unsigned long *p = filp->private_data;
4332 	char *buf = ftrace_dyn_info_buffer;
4333 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
4334 	int r;
4335 
4336 	mutex_lock(&dyn_info_mutex);
4337 	r = sprintf(buf, "%ld ", *p);
4338 
4339 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
4340 	buf[r++] = '\n';
4341 
4342 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4343 
4344 	mutex_unlock(&dyn_info_mutex);
4345 
4346 	return r;
4347 }
4348 
4349 static const struct file_operations tracing_dyn_info_fops = {
4350 	.open		= tracing_open_generic,
4351 	.read		= tracing_read_dyn_info,
4352 	.llseek		= generic_file_llseek,
4353 };
4354 #endif
4355 
4356 static struct dentry *d_tracer;
4357 
tracing_init_dentry(void)4358 struct dentry *tracing_init_dentry(void)
4359 {
4360 	static int once;
4361 
4362 	if (d_tracer)
4363 		return d_tracer;
4364 
4365 	if (!debugfs_initialized())
4366 		return NULL;
4367 
4368 	d_tracer = debugfs_create_dir("tracing", NULL);
4369 
4370 	if (!d_tracer && !once) {
4371 		once = 1;
4372 		pr_warning("Could not create debugfs directory 'tracing'\n");
4373 		return NULL;
4374 	}
4375 
4376 	return d_tracer;
4377 }
4378 
4379 static struct dentry *d_percpu;
4380 
tracing_dentry_percpu(void)4381 struct dentry *tracing_dentry_percpu(void)
4382 {
4383 	static int once;
4384 	struct dentry *d_tracer;
4385 
4386 	if (d_percpu)
4387 		return d_percpu;
4388 
4389 	d_tracer = tracing_init_dentry();
4390 
4391 	if (!d_tracer)
4392 		return NULL;
4393 
4394 	d_percpu = debugfs_create_dir("per_cpu", d_tracer);
4395 
4396 	if (!d_percpu && !once) {
4397 		once = 1;
4398 		pr_warning("Could not create debugfs directory 'per_cpu'\n");
4399 		return NULL;
4400 	}
4401 
4402 	return d_percpu;
4403 }
4404 
tracing_init_debugfs_percpu(long cpu)4405 static void tracing_init_debugfs_percpu(long cpu)
4406 {
4407 	struct dentry *d_percpu = tracing_dentry_percpu();
4408 	struct dentry *d_cpu;
4409 	char cpu_dir[30]; /* 30 characters should be more than enough */
4410 
4411 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
4412 	d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
4413 	if (!d_cpu) {
4414 		pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
4415 		return;
4416 	}
4417 
4418 	/* per cpu trace_pipe */
4419 	trace_create_file("trace_pipe", 0444, d_cpu,
4420 			(void *) cpu, &tracing_pipe_fops);
4421 
4422 	/* per cpu trace */
4423 	trace_create_file("trace", 0644, d_cpu,
4424 			(void *) cpu, &tracing_fops);
4425 
4426 	trace_create_file("trace_pipe_raw", 0444, d_cpu,
4427 			(void *) cpu, &tracing_buffers_fops);
4428 
4429 	trace_create_file("stats", 0444, d_cpu,
4430 			(void *) cpu, &tracing_stats_fops);
4431 }
4432 
4433 #ifdef CONFIG_FTRACE_SELFTEST
4434 /* Let selftest have access to static functions in this file */
4435 #include "trace_selftest.c"
4436 #endif
4437 
4438 struct trace_option_dentry {
4439 	struct tracer_opt		*opt;
4440 	struct tracer_flags		*flags;
4441 	struct dentry			*entry;
4442 };
4443 
4444 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4445 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
4446 			loff_t *ppos)
4447 {
4448 	struct trace_option_dentry *topt = filp->private_data;
4449 	char *buf;
4450 
4451 	if (topt->flags->val & topt->opt->bit)
4452 		buf = "1\n";
4453 	else
4454 		buf = "0\n";
4455 
4456 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
4457 }
4458 
4459 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4460 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
4461 			 loff_t *ppos)
4462 {
4463 	struct trace_option_dentry *topt = filp->private_data;
4464 	unsigned long val;
4465 	int ret;
4466 
4467 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4468 	if (ret)
4469 		return ret;
4470 
4471 	if (val != 0 && val != 1)
4472 		return -EINVAL;
4473 
4474 	if (!!(topt->flags->val & topt->opt->bit) != val) {
4475 		mutex_lock(&trace_types_lock);
4476 		ret = __set_tracer_option(current_trace, topt->flags,
4477 					  topt->opt, !val);
4478 		mutex_unlock(&trace_types_lock);
4479 		if (ret)
4480 			return ret;
4481 	}
4482 
4483 	*ppos += cnt;
4484 
4485 	return cnt;
4486 }
4487 
4488 
4489 static const struct file_operations trace_options_fops = {
4490 	.open = tracing_open_generic,
4491 	.read = trace_options_read,
4492 	.write = trace_options_write,
4493 	.llseek	= generic_file_llseek,
4494 };
4495 
4496 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4497 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
4498 			loff_t *ppos)
4499 {
4500 	long index = (long)filp->private_data;
4501 	char *buf;
4502 
4503 	if (trace_flags & (1 << index))
4504 		buf = "1\n";
4505 	else
4506 		buf = "0\n";
4507 
4508 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
4509 }
4510 
4511 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4512 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
4513 			 loff_t *ppos)
4514 {
4515 	long index = (long)filp->private_data;
4516 	unsigned long val;
4517 	int ret;
4518 
4519 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4520 	if (ret)
4521 		return ret;
4522 
4523 	if (val != 0 && val != 1)
4524 		return -EINVAL;
4525 
4526 	mutex_lock(&trace_types_lock);
4527 	ret = set_tracer_flag(1 << index, val);
4528 	mutex_unlock(&trace_types_lock);
4529 
4530 	if (ret < 0)
4531 		return ret;
4532 
4533 	*ppos += cnt;
4534 
4535 	return cnt;
4536 }
4537 
4538 static const struct file_operations trace_options_core_fops = {
4539 	.open = tracing_open_generic,
4540 	.read = trace_options_core_read,
4541 	.write = trace_options_core_write,
4542 	.llseek = generic_file_llseek,
4543 };
4544 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)4545 struct dentry *trace_create_file(const char *name,
4546 				 umode_t mode,
4547 				 struct dentry *parent,
4548 				 void *data,
4549 				 const struct file_operations *fops)
4550 {
4551 	struct dentry *ret;
4552 
4553 	ret = debugfs_create_file(name, mode, parent, data, fops);
4554 	if (!ret)
4555 		pr_warning("Could not create debugfs '%s' entry\n", name);
4556 
4557 	return ret;
4558 }
4559 
4560 
trace_options_init_dentry(void)4561 static struct dentry *trace_options_init_dentry(void)
4562 {
4563 	struct dentry *d_tracer;
4564 	static struct dentry *t_options;
4565 
4566 	if (t_options)
4567 		return t_options;
4568 
4569 	d_tracer = tracing_init_dentry();
4570 	if (!d_tracer)
4571 		return NULL;
4572 
4573 	t_options = debugfs_create_dir("options", d_tracer);
4574 	if (!t_options) {
4575 		pr_warning("Could not create debugfs directory 'options'\n");
4576 		return NULL;
4577 	}
4578 
4579 	return t_options;
4580 }
4581 
4582 static void
create_trace_option_file(struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)4583 create_trace_option_file(struct trace_option_dentry *topt,
4584 			 struct tracer_flags *flags,
4585 			 struct tracer_opt *opt)
4586 {
4587 	struct dentry *t_options;
4588 
4589 	t_options = trace_options_init_dentry();
4590 	if (!t_options)
4591 		return;
4592 
4593 	topt->flags = flags;
4594 	topt->opt = opt;
4595 
4596 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
4597 				    &trace_options_fops);
4598 
4599 }
4600 
4601 static struct trace_option_dentry *
create_trace_option_files(struct tracer * tracer)4602 create_trace_option_files(struct tracer *tracer)
4603 {
4604 	struct trace_option_dentry *topts;
4605 	struct tracer_flags *flags;
4606 	struct tracer_opt *opts;
4607 	int cnt;
4608 
4609 	if (!tracer)
4610 		return NULL;
4611 
4612 	flags = tracer->flags;
4613 
4614 	if (!flags || !flags->opts)
4615 		return NULL;
4616 
4617 	opts = flags->opts;
4618 
4619 	for (cnt = 0; opts[cnt].name; cnt++)
4620 		;
4621 
4622 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
4623 	if (!topts)
4624 		return NULL;
4625 
4626 	for (cnt = 0; opts[cnt].name; cnt++)
4627 		create_trace_option_file(&topts[cnt], flags,
4628 					 &opts[cnt]);
4629 
4630 	return topts;
4631 }
4632 
4633 static void
destroy_trace_option_files(struct trace_option_dentry * topts)4634 destroy_trace_option_files(struct trace_option_dentry *topts)
4635 {
4636 	int cnt;
4637 
4638 	if (!topts)
4639 		return;
4640 
4641 	for (cnt = 0; topts[cnt].opt; cnt++) {
4642 		if (topts[cnt].entry)
4643 			debugfs_remove(topts[cnt].entry);
4644 	}
4645 
4646 	kfree(topts);
4647 }
4648 
4649 static struct dentry *
create_trace_option_core_file(const char * option,long index)4650 create_trace_option_core_file(const char *option, long index)
4651 {
4652 	struct dentry *t_options;
4653 
4654 	t_options = trace_options_init_dentry();
4655 	if (!t_options)
4656 		return NULL;
4657 
4658 	return trace_create_file(option, 0644, t_options, (void *)index,
4659 				    &trace_options_core_fops);
4660 }
4661 
create_trace_options_dir(void)4662 static __init void create_trace_options_dir(void)
4663 {
4664 	struct dentry *t_options;
4665 	int i;
4666 
4667 	t_options = trace_options_init_dentry();
4668 	if (!t_options)
4669 		return;
4670 
4671 	for (i = 0; trace_options[i]; i++)
4672 		create_trace_option_core_file(trace_options[i], i);
4673 }
4674 
4675 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4676 rb_simple_read(struct file *filp, char __user *ubuf,
4677 	       size_t cnt, loff_t *ppos)
4678 {
4679 	struct trace_array *tr = filp->private_data;
4680 	struct ring_buffer *buffer = tr->buffer;
4681 	char buf[64];
4682 	int r;
4683 
4684 	if (buffer)
4685 		r = ring_buffer_record_is_on(buffer);
4686 	else
4687 		r = 0;
4688 
4689 	r = sprintf(buf, "%d\n", r);
4690 
4691 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4692 }
4693 
4694 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4695 rb_simple_write(struct file *filp, const char __user *ubuf,
4696 		size_t cnt, loff_t *ppos)
4697 {
4698 	struct trace_array *tr = filp->private_data;
4699 	struct ring_buffer *buffer = tr->buffer;
4700 	unsigned long val;
4701 	int ret;
4702 
4703 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4704 	if (ret)
4705 		return ret;
4706 
4707 	if (buffer) {
4708 		if (val)
4709 			ring_buffer_record_on(buffer);
4710 		else
4711 			ring_buffer_record_off(buffer);
4712 	}
4713 
4714 	(*ppos)++;
4715 
4716 	return cnt;
4717 }
4718 
4719 static const struct file_operations rb_simple_fops = {
4720 	.open		= tracing_open_generic,
4721 	.read		= rb_simple_read,
4722 	.write		= rb_simple_write,
4723 	.llseek		= default_llseek,
4724 };
4725 
tracer_init_debugfs(void)4726 static __init int tracer_init_debugfs(void)
4727 {
4728 	struct dentry *d_tracer;
4729 	int cpu;
4730 
4731 	trace_access_lock_init();
4732 
4733 	d_tracer = tracing_init_dentry();
4734 	if (!d_tracer)
4735 		return 0;
4736 
4737 	trace_create_file("tracing_enabled", 0644, d_tracer,
4738 			&global_trace, &tracing_ctrl_fops);
4739 
4740 	trace_create_file("trace_options", 0644, d_tracer,
4741 			NULL, &tracing_iter_fops);
4742 
4743 	trace_create_file("tracing_cpumask", 0644, d_tracer,
4744 			NULL, &tracing_cpumask_fops);
4745 
4746 	trace_create_file("trace", 0644, d_tracer,
4747 			(void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
4748 
4749 	trace_create_file("available_tracers", 0444, d_tracer,
4750 			&global_trace, &show_traces_fops);
4751 
4752 	trace_create_file("current_tracer", 0644, d_tracer,
4753 			&global_trace, &set_tracer_fops);
4754 
4755 #ifdef CONFIG_TRACER_MAX_TRACE
4756 	trace_create_file("tracing_max_latency", 0644, d_tracer,
4757 			&tracing_max_latency, &tracing_max_lat_fops);
4758 #endif
4759 
4760 	trace_create_file("tracing_thresh", 0644, d_tracer,
4761 			&tracing_thresh, &tracing_max_lat_fops);
4762 
4763 	trace_create_file("README", 0444, d_tracer,
4764 			NULL, &tracing_readme_fops);
4765 
4766 	trace_create_file("trace_pipe", 0444, d_tracer,
4767 			(void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
4768 
4769 	trace_create_file("buffer_size_kb", 0644, d_tracer,
4770 			&global_trace, &tracing_entries_fops);
4771 
4772 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
4773 			&global_trace, &tracing_total_entries_fops);
4774 
4775 	trace_create_file("free_buffer", 0644, d_tracer,
4776 			&global_trace, &tracing_free_buffer_fops);
4777 
4778 	trace_create_file("trace_marker", 0220, d_tracer,
4779 			NULL, &tracing_mark_fops);
4780 
4781 	trace_create_file("saved_cmdlines", 0444, d_tracer,
4782 			NULL, &tracing_saved_cmdlines_fops);
4783 
4784 	trace_create_file("trace_clock", 0644, d_tracer, NULL,
4785 			  &trace_clock_fops);
4786 
4787 	trace_create_file("tracing_on", 0644, d_tracer,
4788 			    &global_trace, &rb_simple_fops);
4789 
4790 #ifdef CONFIG_DYNAMIC_FTRACE
4791 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4792 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4793 #endif
4794 
4795 	create_trace_options_dir();
4796 
4797 	for_each_tracing_cpu(cpu)
4798 		tracing_init_debugfs_percpu(cpu);
4799 
4800 	return 0;
4801 }
4802 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)4803 static int trace_panic_handler(struct notifier_block *this,
4804 			       unsigned long event, void *unused)
4805 {
4806 	if (ftrace_dump_on_oops)
4807 		ftrace_dump(ftrace_dump_on_oops);
4808 	return NOTIFY_OK;
4809 }
4810 
4811 static struct notifier_block trace_panic_notifier = {
4812 	.notifier_call  = trace_panic_handler,
4813 	.next           = NULL,
4814 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
4815 };
4816 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)4817 static int trace_die_handler(struct notifier_block *self,
4818 			     unsigned long val,
4819 			     void *data)
4820 {
4821 	switch (val) {
4822 	case DIE_OOPS:
4823 		if (ftrace_dump_on_oops)
4824 			ftrace_dump(ftrace_dump_on_oops);
4825 		break;
4826 	default:
4827 		break;
4828 	}
4829 	return NOTIFY_OK;
4830 }
4831 
4832 static struct notifier_block trace_die_notifier = {
4833 	.notifier_call = trace_die_handler,
4834 	.priority = 200
4835 };
4836 
4837 /*
4838  * printk is set to max of 1024, we really don't need it that big.
4839  * Nothing should be printing 1000 characters anyway.
4840  */
4841 #define TRACE_MAX_PRINT		1000
4842 
4843 /*
4844  * Define here KERN_TRACE so that we have one place to modify
4845  * it if we decide to change what log level the ftrace dump
4846  * should be at.
4847  */
4848 #define KERN_TRACE		KERN_EMERG
4849 
4850 void
trace_printk_seq(struct trace_seq * s)4851 trace_printk_seq(struct trace_seq *s)
4852 {
4853 	/* Probably should print a warning here. */
4854 	if (s->len >= 1000)
4855 		s->len = 1000;
4856 
4857 	/* should be zero ended, but we are paranoid. */
4858 	s->buffer[s->len] = 0;
4859 
4860 	printk(KERN_TRACE "%s", s->buffer);
4861 
4862 	trace_seq_init(s);
4863 }
4864 
trace_init_global_iter(struct trace_iterator * iter)4865 void trace_init_global_iter(struct trace_iterator *iter)
4866 {
4867 	iter->tr = &global_trace;
4868 	iter->trace = current_trace;
4869 	iter->cpu_file = TRACE_PIPE_ALL_CPU;
4870 }
4871 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)4872 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
4873 {
4874 	/* use static because iter can be a bit big for the stack */
4875 	static struct trace_iterator iter;
4876 	static atomic_t dump_running;
4877 	unsigned int old_userobj;
4878 	unsigned long flags;
4879 	int cnt = 0, cpu;
4880 
4881 	/* Only allow one dump user at a time. */
4882 	if (atomic_inc_return(&dump_running) != 1) {
4883 		atomic_dec(&dump_running);
4884 		return;
4885 	}
4886 
4887 	/*
4888 	 * Always turn off tracing when we dump.
4889 	 * We don't need to show trace output of what happens
4890 	 * between multiple crashes.
4891 	 *
4892 	 * If the user does a sysrq-z, then they can re-enable
4893 	 * tracing with echo 1 > tracing_on.
4894 	 */
4895 	tracing_off();
4896 
4897 	local_irq_save(flags);
4898 
4899 	trace_init_global_iter(&iter);
4900 
4901 	for_each_tracing_cpu(cpu) {
4902 		atomic_inc(&iter.tr->data[cpu]->disabled);
4903 	}
4904 
4905 	old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
4906 
4907 	/* don't look at user memory in panic mode */
4908 	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
4909 
4910 	/* Simulate the iterator */
4911 	iter.tr = &global_trace;
4912 	iter.trace = current_trace;
4913 
4914 	switch (oops_dump_mode) {
4915 	case DUMP_ALL:
4916 		iter.cpu_file = TRACE_PIPE_ALL_CPU;
4917 		break;
4918 	case DUMP_ORIG:
4919 		iter.cpu_file = raw_smp_processor_id();
4920 		break;
4921 	case DUMP_NONE:
4922 		goto out_enable;
4923 	default:
4924 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
4925 		iter.cpu_file = TRACE_PIPE_ALL_CPU;
4926 	}
4927 
4928 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
4929 
4930 	/* Did function tracer already get disabled? */
4931 	if (ftrace_is_dead()) {
4932 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
4933 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
4934 	}
4935 
4936 	/*
4937 	 * We need to stop all tracing on all CPUS to read the
4938 	 * the next buffer. This is a bit expensive, but is
4939 	 * not done often. We fill all what we can read,
4940 	 * and then release the locks again.
4941 	 */
4942 
4943 	while (!trace_empty(&iter)) {
4944 
4945 		if (!cnt)
4946 			printk(KERN_TRACE "---------------------------------\n");
4947 
4948 		cnt++;
4949 
4950 		/* reset all but tr, trace, and overruns */
4951 		memset(&iter.seq, 0,
4952 		       sizeof(struct trace_iterator) -
4953 		       offsetof(struct trace_iterator, seq));
4954 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
4955 		iter.pos = -1;
4956 
4957 		if (trace_find_next_entry_inc(&iter) != NULL) {
4958 			int ret;
4959 
4960 			ret = print_trace_line(&iter);
4961 			if (ret != TRACE_TYPE_NO_CONSUME)
4962 				trace_consume(&iter);
4963 		}
4964 		touch_nmi_watchdog();
4965 
4966 		trace_printk_seq(&iter.seq);
4967 	}
4968 
4969 	if (!cnt)
4970 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
4971 	else
4972 		printk(KERN_TRACE "---------------------------------\n");
4973 
4974  out_enable:
4975 	trace_flags |= old_userobj;
4976 
4977 	for_each_tracing_cpu(cpu) {
4978 		atomic_dec(&iter.tr->data[cpu]->disabled);
4979 	}
4980  	atomic_dec(&dump_running);
4981 	local_irq_restore(flags);
4982 }
4983 EXPORT_SYMBOL_GPL(ftrace_dump);
4984 
tracer_alloc_buffers(void)4985 __init static int tracer_alloc_buffers(void)
4986 {
4987 	int ring_buf_size;
4988 	enum ring_buffer_flags rb_flags;
4989 	int i;
4990 	int ret = -ENOMEM;
4991 
4992 
4993 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
4994 		goto out;
4995 
4996 	if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4997 		goto out_free_buffer_mask;
4998 
4999 	/* To save memory, keep the ring buffer size to its minimum */
5000 	if (ring_buffer_expanded)
5001 		ring_buf_size = trace_buf_size;
5002 	else
5003 		ring_buf_size = 1;
5004 
5005 	rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5006 
5007 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
5008 	cpumask_copy(tracing_cpumask, cpu_all_mask);
5009 
5010 	/* TODO: make the number of buffers hot pluggable with CPUS */
5011 	global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags);
5012 	if (!global_trace.buffer) {
5013 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
5014 		WARN_ON(1);
5015 		goto out_free_cpumask;
5016 	}
5017 	global_trace.entries = ring_buffer_size(global_trace.buffer);
5018 	if (global_trace.buffer_disabled)
5019 		tracing_off();
5020 
5021 
5022 #ifdef CONFIG_TRACER_MAX_TRACE
5023 	max_tr.buffer = ring_buffer_alloc(1, rb_flags);
5024 	if (!max_tr.buffer) {
5025 		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
5026 		WARN_ON(1);
5027 		ring_buffer_free(global_trace.buffer);
5028 		goto out_free_cpumask;
5029 	}
5030 	max_tr.entries = 1;
5031 #endif
5032 
5033 	/* Allocate the first page for all buffers */
5034 	for_each_tracing_cpu(i) {
5035 		global_trace.data[i] = &per_cpu(global_trace_cpu, i);
5036 		max_tr.data[i] = &per_cpu(max_tr_data, i);
5037 	}
5038 
5039 	trace_init_cmdlines();
5040 
5041 	register_tracer(&nop_trace);
5042 	current_trace = &nop_trace;
5043 	/* All seems OK, enable tracing */
5044 	tracing_disabled = 0;
5045 
5046 	atomic_notifier_chain_register(&panic_notifier_list,
5047 				       &trace_panic_notifier);
5048 
5049 	register_die_notifier(&trace_die_notifier);
5050 
5051 	return 0;
5052 
5053 out_free_cpumask:
5054 	free_cpumask_var(tracing_cpumask);
5055 out_free_buffer_mask:
5056 	free_cpumask_var(tracing_buffer_mask);
5057 out:
5058 	return ret;
5059 }
5060 
clear_boot_tracer(void)5061 __init static int clear_boot_tracer(void)
5062 {
5063 	/*
5064 	 * The default tracer at boot buffer is an init section.
5065 	 * This function is called in lateinit. If we did not
5066 	 * find the boot tracer, then clear it out, to prevent
5067 	 * later registration from accessing the buffer that is
5068 	 * about to be freed.
5069 	 */
5070 	if (!default_bootup_tracer)
5071 		return 0;
5072 
5073 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
5074 	       default_bootup_tracer);
5075 	default_bootup_tracer = NULL;
5076 
5077 	return 0;
5078 }
5079 
5080 early_initcall(tracer_alloc_buffers);
5081 fs_initcall(tracer_init_debugfs);
5082 late_initcall(clear_boot_tracer);
5083