1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "utils.h"
9 #include "osnoise.h"
10 #include "timerlat.h"
11 #include <unistd.h>
12 
13 enum timelat_state {
14 	TIMERLAT_INIT = 0,
15 	TIMERLAT_WAITING_IRQ,
16 	TIMERLAT_WAITING_THREAD,
17 };
18 
19 #define MAX_COMM		24
20 
21 /*
22  * Per-cpu data statistics and data.
23  */
24 struct timerlat_aa_data {
25 	/* Current CPU state */
26 	int			curr_state;
27 
28 	/* timerlat IRQ latency */
29 	unsigned long long	tlat_irq_seqnum;
30 	unsigned long long	tlat_irq_latency;
31 	unsigned long long	tlat_irq_timstamp;
32 
33 	/* timerlat Thread latency */
34 	unsigned long long	tlat_thread_seqnum;
35 	unsigned long long	tlat_thread_latency;
36 	unsigned long long	tlat_thread_timstamp;
37 
38 	/*
39 	 * Information about the thread running when the IRQ
40 	 * arrived.
41 	 *
42 	 * This can be blocking or interference, depending on the
43 	 * priority of the thread. Assuming timerlat is the highest
44 	 * prio, it is blocking. If timerlat has a lower prio, it is
45 	 * interference.
46 	 * note: "unsigned long long" because they are fetch using tep_get_field_val();
47 	 */
48 	unsigned long long	run_thread_pid;
49 	char			run_thread_comm[MAX_COMM];
50 	unsigned long long	thread_blocking_duration;
51 	unsigned long long	max_exit_idle_latency;
52 
53 	/* Information about the timerlat timer irq */
54 	unsigned long long	timer_irq_start_time;
55 	unsigned long long	timer_irq_start_delay;
56 	unsigned long long	timer_irq_duration;
57 	unsigned long long	timer_exit_from_idle;
58 
59 	/*
60 	 * Information about the last IRQ before the timerlat irq
61 	 * arrived.
62 	 *
63 	 * If now - timestamp is <= latency, it might have influenced
64 	 * in the timerlat irq latency. Otherwise, ignore it.
65 	 */
66 	unsigned long long	prev_irq_duration;
67 	unsigned long long	prev_irq_timstamp;
68 
69 	/*
70 	 * Interference sum.
71 	 */
72 	unsigned long long	thread_nmi_sum;
73 	unsigned long long	thread_irq_sum;
74 	unsigned long long	thread_softirq_sum;
75 	unsigned long long	thread_thread_sum;
76 
77 	/*
78 	 * Interference task information.
79 	 */
80 	struct trace_seq	*prev_irqs_seq;
81 	struct trace_seq	*nmi_seq;
82 	struct trace_seq	*irqs_seq;
83 	struct trace_seq	*softirqs_seq;
84 	struct trace_seq	*threads_seq;
85 	struct trace_seq	*stack_seq;
86 
87 	/*
88 	 * Current thread.
89 	 */
90 	char			current_comm[MAX_COMM];
91 	unsigned long long	current_pid;
92 
93 	/*
94 	 * Is the system running a kworker?
95 	 */
96 	unsigned long long	kworker;
97 	unsigned long long	kworker_func;
98 };
99 
100 /*
101  * The analysis context and system wide view
102  */
103 struct timerlat_aa_context {
104 	int nr_cpus;
105 	int dump_tasks;
106 
107 	/* per CPU data */
108 	struct timerlat_aa_data *taa_data;
109 
110 	/*
111 	 * required to translate function names and register
112 	 * events.
113 	 */
114 	struct osnoise_tool *tool;
115 };
116 
117 /*
118  * The data is stored as a local variable, but accessed via a helper function.
119  *
120  * It could be stored inside the trace context. But every access would
121  * require container_of() + a series of pointers. Do we need it? Not sure.
122  *
123  * For now keep it simple. If needed, store it in the tool, add the *context
124  * as a parameter in timerlat_aa_get_ctx() and do the magic there.
125  */
126 static struct timerlat_aa_context *__timerlat_aa_ctx;
127 
timerlat_aa_get_ctx(void)128 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
129 {
130 	return __timerlat_aa_ctx;
131 }
132 
133 /*
134  * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
135  */
136 static struct timerlat_aa_data
timerlat_aa_get_data(struct timerlat_aa_context * taa_ctx,int cpu)137 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
138 {
139 	return &taa_ctx->taa_data[cpu];
140 }
141 
142 /*
143  * timerlat_aa_irq_latency - Handles timerlat IRQ event
144  */
timerlat_aa_irq_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)145 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
146 				   struct trace_seq *s, struct tep_record *record,
147 				   struct tep_event *event)
148 {
149 	/*
150 	 * For interference, we start now looking for things that can delay
151 	 * the thread.
152 	 */
153 	taa_data->curr_state = TIMERLAT_WAITING_THREAD;
154 	taa_data->tlat_irq_timstamp = record->ts;
155 
156 	/*
157 	 * Zero values.
158 	 */
159 	taa_data->thread_nmi_sum = 0;
160 	taa_data->thread_irq_sum = 0;
161 	taa_data->thread_softirq_sum = 0;
162 	taa_data->thread_thread_sum = 0;
163 	taa_data->thread_blocking_duration = 0;
164 	taa_data->timer_irq_start_time = 0;
165 	taa_data->timer_irq_duration = 0;
166 	taa_data->timer_exit_from_idle = 0;
167 
168 	/*
169 	 * Zero interference tasks.
170 	 */
171 	trace_seq_reset(taa_data->nmi_seq);
172 	trace_seq_reset(taa_data->irqs_seq);
173 	trace_seq_reset(taa_data->softirqs_seq);
174 	trace_seq_reset(taa_data->threads_seq);
175 
176 	/* IRQ latency values */
177 	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
178 	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
179 
180 	/* The thread that can cause blocking */
181 	tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
182 
183 	/*
184 	 * Get exit from idle case.
185 	 *
186 	 * If it is not idle thread:
187 	 */
188 	if (taa_data->run_thread_pid)
189 		return 0;
190 
191 	/*
192 	 * if the latency is shorter than the known exit from idle:
193 	 */
194 	if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
195 		return 0;
196 
197 	/*
198 	 * To be safe, ignore the cases in which an IRQ/NMI could have
199 	 * interfered with the timerlat IRQ.
200 	 */
201 	if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
202 	    < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
203 		return 0;
204 
205 	taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
206 
207 	return 0;
208 }
209 
210 /*
211  * timerlat_aa_thread_latency - Handles timerlat thread event
212  */
timerlat_aa_thread_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)213 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
214 				      struct trace_seq *s, struct tep_record *record,
215 				      struct tep_event *event)
216 {
217 	/*
218 	 * For interference, we start now looking for things that can delay
219 	 * the IRQ of the next cycle.
220 	 */
221 	taa_data->curr_state = TIMERLAT_WAITING_IRQ;
222 	taa_data->tlat_thread_timstamp = record->ts;
223 
224 	/* Thread latency values */
225 	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
226 	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
227 
228 	return 0;
229 }
230 
231 /*
232  * timerlat_aa_handler - Handle timerlat events
233  *
234  * This function is called to handle timerlat events recording statistics.
235  *
236  * Returns 0 on success, -1 otherwise.
237  */
timerlat_aa_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)238 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
239 			struct tep_event *event, void *context)
240 {
241 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
242 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
243 	unsigned long long thread;
244 
245 	if (!taa_data)
246 		return -1;
247 
248 	tep_get_field_val(s, event, "context", record, &thread, 1);
249 	if (!thread)
250 		return timerlat_aa_irq_latency(taa_data, s, record, event);
251 	else
252 		return timerlat_aa_thread_latency(taa_data, s, record, event);
253 }
254 
255 /*
256  * timerlat_aa_nmi_handler - Handles NMI noise
257  *
258  * It is used to collect information about interferences from NMI. It is
259  * hooked to the osnoise:nmi_noise event.
260  */
timerlat_aa_nmi_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)261 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
262 				   struct tep_event *event, void *context)
263 {
264 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
265 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
266 	unsigned long long duration;
267 	unsigned long long start;
268 
269 	tep_get_field_val(s, event, "duration", record, &duration, 1);
270 	tep_get_field_val(s, event, "start", record, &start, 1);
271 
272 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
273 		taa_data->prev_irq_duration = duration;
274 		taa_data->prev_irq_timstamp = start;
275 
276 		trace_seq_reset(taa_data->prev_irqs_seq);
277 		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s	\t\t\t%9.2f us\n",
278 			 "nmi", ns_to_usf(duration));
279 		return 0;
280 	}
281 
282 	taa_data->thread_nmi_sum += duration;
283 	trace_seq_printf(taa_data->nmi_seq, "	%24s	\t\t\t%9.2f us\n",
284 		 "nmi", ns_to_usf(duration));
285 
286 	return 0;
287 }
288 
289 /*
290  * timerlat_aa_irq_handler - Handles IRQ noise
291  *
292  * It is used to collect information about interferences from IRQ. It is
293  * hooked to the osnoise:irq_noise event.
294  *
295  * It is a little bit more complex than the other because it measures:
296  *	- The IRQs that can delay the timer IRQ before it happened.
297  *	- The Timerlat IRQ handler
298  *	- The IRQs that happened between the timerlat IRQ and the timerlat thread
299  *	  (IRQ interference).
300  */
timerlat_aa_irq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)301 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
302 				   struct tep_event *event, void *context)
303 {
304 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
305 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
306 	unsigned long long expected_start;
307 	unsigned long long duration;
308 	unsigned long long vector;
309 	unsigned long long start;
310 	char *desc;
311 	int val;
312 
313 	tep_get_field_val(s, event, "duration", record, &duration, 1);
314 	tep_get_field_val(s, event, "start", record, &start, 1);
315 	tep_get_field_val(s, event, "vector", record, &vector, 1);
316 	desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
317 
318 	/*
319 	 * Before the timerlat IRQ.
320 	 */
321 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
322 		taa_data->prev_irq_duration = duration;
323 		taa_data->prev_irq_timstamp = start;
324 
325 		trace_seq_reset(taa_data->prev_irqs_seq);
326 		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
327 				 desc, vector, ns_to_usf(duration));
328 		return 0;
329 	}
330 
331 	/*
332 	 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
333 	 * the timerlat irq handler.
334 	 */
335 	if (!taa_data->timer_irq_start_time) {
336 		expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
337 
338 		taa_data->timer_irq_start_time = start;
339 		taa_data->timer_irq_duration = duration;
340 
341 		/*
342 		 * We are dealing with two different clock sources: the
343 		 * external clock source that timerlat uses as a reference
344 		 * and the clock used by the tracer. There are also two
345 		 * moments: the time reading the clock and the timer in
346 		 * which the event is placed in the buffer (the trace
347 		 * event timestamp). If the processor is slow or there
348 		 * is some hardware noise, the difference between the
349 		 * timestamp and the external clock read can be longer
350 		 * than the IRQ handler delay, resulting in a negative
351 		 * time. If so, set IRQ start delay as 0. In the end,
352 		 * it is less relevant than the noise.
353 		 */
354 		if (expected_start < taa_data->timer_irq_start_time)
355 			taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
356 		else
357 			taa_data->timer_irq_start_delay = 0;
358 
359 		/*
360 		 * not exit from idle.
361 		 */
362 		if (taa_data->run_thread_pid)
363 			return 0;
364 
365 		if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
366 			taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
367 
368 		return 0;
369 	}
370 
371 	/*
372 	 * IRQ interference.
373 	 */
374 	taa_data->thread_irq_sum += duration;
375 	trace_seq_printf(taa_data->irqs_seq, "	%24s:%-3llu	\t	%9.2f us\n",
376 			 desc, vector, ns_to_usf(duration));
377 
378 	return 0;
379 }
380 
381 static char *softirq_name[] = { "HI", "TIMER",	"NET_TX", "NET_RX", "BLOCK",
382 				"IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
383 
384 
385 /*
386  * timerlat_aa_softirq_handler - Handles Softirq noise
387  *
388  * It is used to collect information about interferences from Softirq. It is
389  * hooked to the osnoise:softirq_noise event.
390  *
391  * It is only printed in the non-rt kernel, as softirqs become thread on RT.
392  */
timerlat_aa_softirq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)393 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
394 				       struct tep_event *event, void *context)
395 {
396 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
397 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
398 	unsigned long long duration;
399 	unsigned long long vector;
400 	unsigned long long start;
401 
402 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
403 		return 0;
404 
405 	tep_get_field_val(s, event, "duration", record, &duration, 1);
406 	tep_get_field_val(s, event, "start", record, &start, 1);
407 	tep_get_field_val(s, event, "vector", record, &vector, 1);
408 
409 	taa_data->thread_softirq_sum += duration;
410 
411 	trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu	\t	%9.2f us\n",
412 			 softirq_name[vector], vector, ns_to_usf(duration));
413 	return 0;
414 }
415 
416 /*
417  * timerlat_aa_softirq_handler - Handles thread noise
418  *
419  * It is used to collect information about interferences from threads. It is
420  * hooked to the osnoise:thread_noise event.
421  *
422  * Note: if you see thread noise, your timerlat thread was not the highest prio one.
423  */
timerlat_aa_thread_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)424 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
425 				      struct tep_event *event, void *context)
426 {
427 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
428 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
429 	unsigned long long duration;
430 	unsigned long long start;
431 	unsigned long long pid;
432 	const char *comm;
433 	int val;
434 
435 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
436 		return 0;
437 
438 	tep_get_field_val(s, event, "duration", record, &duration, 1);
439 	tep_get_field_val(s, event, "start", record, &start, 1);
440 
441 	tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
442 	comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
443 
444 	if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
445 		taa_data->thread_blocking_duration = duration;
446 
447 		if (comm)
448 			strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
449 		else
450 			sprintf(taa_data->run_thread_comm, "<...>");
451 
452 	} else {
453 		taa_data->thread_thread_sum += duration;
454 
455 		trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
456 			 comm, pid, ns_to_usf(duration));
457 	}
458 
459 	return 0;
460 }
461 
462 /*
463  * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
464  *
465  * Saves and parse the stack trace generated by the timerlat IRQ.
466  */
timerlat_aa_stack_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)467 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
468 			      struct tep_event *event, void *context)
469 {
470 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
471 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
472 	unsigned long *caller;
473 	const char *function;
474 	int val, i;
475 
476 	trace_seq_reset(taa_data->stack_seq);
477 
478 	trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
479 	caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
480 	if (caller) {
481 		for (i = 0; ; i++) {
482 			function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
483 			if (!function)
484 				break;
485 			trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
486 		}
487 	}
488 	return 0;
489 }
490 
491 /*
492  * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
493  *
494  * Handles the sched:sched_switch event to trace the current thread running on the
495  * CPU. It is used to display the threads running on the other CPUs when the trace
496  * stops.
497  */
timerlat_aa_sched_switch_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)498 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
499 					    struct tep_event *event, void *context)
500 {
501 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
502 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
503 	const char *comm;
504 	int val;
505 
506 	tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
507 	comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
508 
509 	strncpy(taa_data->current_comm, comm, MAX_COMM);
510 
511 	/*
512 	 * If this was a kworker, clean the last kworkers that ran.
513 	 */
514 	taa_data->kworker = 0;
515 	taa_data->kworker_func = 0;
516 
517 	return 0;
518 }
519 
520 /*
521  * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
522  *
523  * Handles workqueue:workqueue_execute_start event, keeping track of
524  * the job that a kworker could be doing in the CPU.
525  *
526  * We already catch problems of hardware related latencies caused by work queues
527  * running driver code that causes hardware stall. For example, with DRM drivers.
528  */
timerlat_aa_kworker_start_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)529 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
530 					     struct tep_event *event, void *context)
531 {
532 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
533 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
534 
535 	tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
536 	tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
537 	return 0;
538 }
539 
540 /*
541  * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
542  *
543  * This is the core of the analysis.
544  */
timerlat_thread_analysis(struct timerlat_aa_data * taa_data,int cpu,int irq_thresh,int thread_thresh)545 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
546 				     int irq_thresh, int thread_thresh)
547 {
548 	long long exp_irq_ts;
549 	int total;
550 	int irq;
551 
552 	/*
553 	 * IRQ latency or Thread latency?
554 	 */
555 	if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
556 		irq = 1;
557 		total = taa_data->tlat_irq_latency;
558 	} else {
559 		irq = 0;
560 		total = taa_data->tlat_thread_latency;
561 	}
562 
563 	/*
564 	 * Expected IRQ arrival time using the trace clock as the base.
565 	 *
566 	 * TODO: Add a list of previous IRQ, and then run the list backwards.
567 	 */
568 	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
569 	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
570 		if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
571 			printf("  Previous IRQ interference:	\t\t up to  %9.2f us\n",
572 				ns_to_usf(taa_data->prev_irq_duration));
573 	}
574 
575 	/*
576 	 * The delay that the IRQ suffered before starting.
577 	 */
578 	printf("  IRQ handler delay:		%16s	%9.2f us (%.2f %%)\n",
579 		(ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
580 		ns_to_usf(taa_data->timer_irq_start_delay),
581 		ns_to_per(total, taa_data->timer_irq_start_delay));
582 
583 	/*
584 	 * Timerlat IRQ.
585 	 */
586 	printf("  IRQ latency:	\t\t\t\t	%9.2f us\n",
587 		ns_to_usf(taa_data->tlat_irq_latency));
588 
589 	if (irq) {
590 		/*
591 		 * If the trace stopped due to IRQ, the other events will not happen
592 		 * because... the trace stopped :-).
593 		 *
594 		 * That is all folks, the stack trace was printed before the stop,
595 		 * so it will be displayed, it is the key.
596 		 */
597 		printf("  Blocking thread:\n");
598 		printf("	%24s:%-9llu\n",
599 			taa_data->run_thread_comm, taa_data->run_thread_pid);
600 	} else  {
601 		/*
602 		 * The duration of the IRQ handler that handled the timerlat IRQ.
603 		 */
604 		printf("  Timerlat IRQ duration:	\t\t	%9.2f us (%.2f %%)\n",
605 			ns_to_usf(taa_data->timer_irq_duration),
606 			ns_to_per(total, taa_data->timer_irq_duration));
607 
608 		/*
609 		 * The amount of time that the current thread postponed the scheduler.
610 		 *
611 		 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
612 		 * is no need to compute values here.
613 		 */
614 		printf("  Blocking thread:	\t\t\t	%9.2f us (%.2f %%)\n",
615 			ns_to_usf(taa_data->thread_blocking_duration),
616 			ns_to_per(total, taa_data->thread_blocking_duration));
617 
618 		printf("	%24s:%-9llu		%9.2f us\n",
619 			taa_data->run_thread_comm, taa_data->run_thread_pid,
620 			ns_to_usf(taa_data->thread_blocking_duration));
621 	}
622 
623 	/*
624 	 * Print the stack trace!
625 	 */
626 	trace_seq_do_printf(taa_data->stack_seq);
627 
628 	/*
629 	 * NMIs can happen during the IRQ, so they are always possible.
630 	 */
631 	if (taa_data->thread_nmi_sum)
632 		printf("  NMI interference	\t\t\t	%9.2f us (%.2f %%)\n",
633 			ns_to_usf(taa_data->thread_nmi_sum),
634 			ns_to_per(total, taa_data->thread_nmi_sum));
635 
636 	/*
637 	 * If it is an IRQ latency, the other factors can be skipped.
638 	 */
639 	if (irq)
640 		goto print_total;
641 
642 	/*
643 	 * Prints the interference caused by IRQs to the thread latency.
644 	 */
645 	if (taa_data->thread_irq_sum) {
646 		printf("  IRQ interference	\t\t\t	%9.2f us (%.2f %%)\n",
647 			ns_to_usf(taa_data->thread_irq_sum),
648 			ns_to_per(total, taa_data->thread_irq_sum));
649 
650 		trace_seq_do_printf(taa_data->irqs_seq);
651 	}
652 
653 	/*
654 	 * Prints the interference caused by Softirqs to the thread latency.
655 	 */
656 	if (taa_data->thread_softirq_sum) {
657 		printf("  Softirq interference	\t\t\t	%9.2f us (%.2f %%)\n",
658 			ns_to_usf(taa_data->thread_softirq_sum),
659 			ns_to_per(total, taa_data->thread_softirq_sum));
660 
661 		trace_seq_do_printf(taa_data->softirqs_seq);
662 	}
663 
664 	/*
665 	 * Prints the interference caused by other threads to the thread latency.
666 	 *
667 	 * If this happens, your timerlat is not the highest prio. OK, migration
668 	 * thread can happen. But otherwise, you are not measuring the "scheduling
669 	 * latency" only, and here is the difference from scheduling latency and
670 	 * timer handling latency.
671 	 */
672 	if (taa_data->thread_thread_sum) {
673 		printf("  Thread interference	\t\t\t	%9.2f us (%.2f %%)\n",
674 			ns_to_usf(taa_data->thread_thread_sum),
675 			ns_to_per(total, taa_data->thread_thread_sum));
676 
677 		trace_seq_do_printf(taa_data->threads_seq);
678 	}
679 
680 	/*
681 	 * Done.
682 	 */
683 print_total:
684 	printf("------------------------------------------------------------------------\n");
685 	printf("  %s latency:	\t\t\t	%9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
686 		ns_to_usf(total));
687 }
688 
timerlat_auto_analysis_collect_trace(struct timerlat_aa_context * taa_ctx)689 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
690 {
691 	struct trace_instance *trace = &taa_ctx->tool->trace;
692 	int retval;
693 
694 	retval = tracefs_iterate_raw_events(trace->tep,
695 					    trace->inst,
696 					    NULL,
697 					    0,
698 					    collect_registered_events,
699 					    trace);
700 		if (retval < 0) {
701 			err_msg("Error iterating on events\n");
702 			return 0;
703 		}
704 
705 	return 1;
706 }
707 
708 /**
709  * timerlat_auto_analysis - Analyze the collected data
710  */
timerlat_auto_analysis(int irq_thresh,int thread_thresh)711 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
712 {
713 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
714 	unsigned long long max_exit_from_idle = 0;
715 	struct timerlat_aa_data *taa_data;
716 	int max_exit_from_idle_cpu;
717 	struct tep_handle *tep;
718 	int cpu;
719 
720 	timerlat_auto_analysis_collect_trace(taa_ctx);
721 
722 	/* bring stop tracing to the ns scale */
723 	irq_thresh = irq_thresh * 1000;
724 	thread_thresh = thread_thresh * 1000;
725 
726 	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
727 		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
728 
729 		if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
730 			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
731 			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
732 		} else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
733 			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
734 			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
735 		}
736 
737 		if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
738 			max_exit_from_idle = taa_data->max_exit_idle_latency;
739 			max_exit_from_idle_cpu = cpu;
740 		}
741 
742 	}
743 
744 	if (max_exit_from_idle) {
745 		printf("\n");
746 		printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
747 			ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
748 	}
749 	if (!taa_ctx->dump_tasks)
750 		return;
751 
752 	printf("\n");
753 	printf("Printing CPU tasks:\n");
754 	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
755 		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
756 		tep = taa_ctx->tool->trace.tep;
757 
758 		printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
759 
760 		if (taa_data->kworker_func)
761 			printf(" kworker:%s:%s",
762 				tep_find_function(tep, taa_data->kworker) ? : "<...>",
763 				tep_find_function(tep, taa_data->kworker_func));
764 		printf("\n");
765 	}
766 
767 }
768 
769 /*
770  * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
771  */
timerlat_aa_destroy_seqs(struct timerlat_aa_context * taa_ctx)772 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
773 {
774 	struct timerlat_aa_data *taa_data;
775 	int i;
776 
777 	if (!taa_ctx->taa_data)
778 		return;
779 
780 	for (i = 0; i < taa_ctx->nr_cpus; i++) {
781 		taa_data = timerlat_aa_get_data(taa_ctx, i);
782 
783 		if (taa_data->prev_irqs_seq) {
784 			trace_seq_destroy(taa_data->prev_irqs_seq);
785 			free(taa_data->prev_irqs_seq);
786 		}
787 
788 		if (taa_data->nmi_seq) {
789 			trace_seq_destroy(taa_data->nmi_seq);
790 			free(taa_data->nmi_seq);
791 		}
792 
793 		if (taa_data->irqs_seq) {
794 			trace_seq_destroy(taa_data->irqs_seq);
795 			free(taa_data->irqs_seq);
796 		}
797 
798 		if (taa_data->softirqs_seq) {
799 			trace_seq_destroy(taa_data->softirqs_seq);
800 			free(taa_data->softirqs_seq);
801 		}
802 
803 		if (taa_data->threads_seq) {
804 			trace_seq_destroy(taa_data->threads_seq);
805 			free(taa_data->threads_seq);
806 		}
807 
808 		if (taa_data->stack_seq) {
809 			trace_seq_destroy(taa_data->stack_seq);
810 			free(taa_data->stack_seq);
811 		}
812 	}
813 }
814 
815 /*
816  * timerlat_aa_init_seqs - Init seq files used to store parsed information
817  *
818  * Instead of keeping data structures to store raw data, use seq files to
819  * store parsed data.
820  *
821  * Allocates and initialize seq files.
822  *
823  * Returns 0 on success, -1 otherwise.
824  */
timerlat_aa_init_seqs(struct timerlat_aa_context * taa_ctx)825 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
826 {
827 	struct timerlat_aa_data *taa_data;
828 	int i;
829 
830 	for (i = 0; i < taa_ctx->nr_cpus; i++) {
831 
832 		taa_data = timerlat_aa_get_data(taa_ctx, i);
833 
834 		taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
835 		if (!taa_data->prev_irqs_seq)
836 			goto out_err;
837 
838 		trace_seq_init(taa_data->prev_irqs_seq);
839 
840 		taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
841 		if (!taa_data->nmi_seq)
842 			goto out_err;
843 
844 		trace_seq_init(taa_data->nmi_seq);
845 
846 		taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
847 		if (!taa_data->irqs_seq)
848 			goto out_err;
849 
850 		trace_seq_init(taa_data->irqs_seq);
851 
852 		taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
853 		if (!taa_data->softirqs_seq)
854 			goto out_err;
855 
856 		trace_seq_init(taa_data->softirqs_seq);
857 
858 		taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
859 		if (!taa_data->threads_seq)
860 			goto out_err;
861 
862 		trace_seq_init(taa_data->threads_seq);
863 
864 		taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
865 		if (!taa_data->stack_seq)
866 			goto out_err;
867 
868 		trace_seq_init(taa_data->stack_seq);
869 	}
870 
871 	return 0;
872 
873 out_err:
874 	timerlat_aa_destroy_seqs(taa_ctx);
875 	return -1;
876 }
877 
878 /*
879  * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
880  */
timerlat_aa_unregister_events(struct osnoise_tool * tool,int dump_tasks)881 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
882 {
883 
884 	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
885 				     timerlat_aa_handler, tool);
886 
887 	tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
888 
889 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
890 				     timerlat_aa_nmi_handler, tool);
891 
892 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
893 				     timerlat_aa_irq_handler, tool);
894 
895 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
896 				     timerlat_aa_softirq_handler, tool);
897 
898 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
899 				     timerlat_aa_thread_handler, tool);
900 
901 	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
902 				     timerlat_aa_stack_handler, tool);
903 	if (!dump_tasks)
904 		return;
905 
906 	tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
907 	tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
908 				     timerlat_aa_sched_switch_handler, tool);
909 
910 	tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
911 	tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
912 				     timerlat_aa_kworker_start_handler, tool);
913 }
914 
915 /*
916  * timerlat_aa_register_events - Register events used in the auto-analysis
917  *
918  * Returns 0 on success, -1 otherwise.
919  */
timerlat_aa_register_events(struct osnoise_tool * tool,int dump_tasks)920 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
921 {
922 	int retval;
923 
924 	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
925 				timerlat_aa_handler, tool);
926 
927 
928 	/*
929 	 * register auto-analysis handlers.
930 	 */
931 	retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
932 	if (retval < 0 && !errno) {
933 		err_msg("Could not find osnoise events\n");
934 		goto out_err;
935 	}
936 
937 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
938 				   timerlat_aa_nmi_handler, tool);
939 
940 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
941 				   timerlat_aa_irq_handler, tool);
942 
943 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
944 				   timerlat_aa_softirq_handler, tool);
945 
946 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
947 				   timerlat_aa_thread_handler, tool);
948 
949 	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
950 				   timerlat_aa_stack_handler, tool);
951 
952 	if (!dump_tasks)
953 		return 0;
954 
955 	/*
956 	 * Dump task events.
957 	 */
958 	retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
959 	if (retval < 0 && !errno) {
960 		err_msg("Could not find sched_switch\n");
961 		goto out_err;
962 	}
963 
964 	tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
965 				   timerlat_aa_sched_switch_handler, tool);
966 
967 	retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
968 	if (retval < 0 && !errno) {
969 		err_msg("Could not find workqueue_execute_start\n");
970 		goto out_err;
971 	}
972 
973 	tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
974 				   timerlat_aa_kworker_start_handler, tool);
975 
976 	return 0;
977 
978 out_err:
979 	timerlat_aa_unregister_events(tool, dump_tasks);
980 	return -1;
981 }
982 
983 /**
984  * timerlat_aa_destroy - Destroy timerlat auto-analysis
985  */
timerlat_aa_destroy(void)986 void timerlat_aa_destroy(void)
987 {
988 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
989 
990 	if (!taa_ctx)
991 		return;
992 
993 	if (!taa_ctx->taa_data)
994 		goto out_ctx;
995 
996 	timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
997 	timerlat_aa_destroy_seqs(taa_ctx);
998 	free(taa_ctx->taa_data);
999 out_ctx:
1000 	free(taa_ctx);
1001 }
1002 
1003 /**
1004  * timerlat_aa_init - Initialize timerlat auto-analysis
1005  *
1006  * Returns 0 on success, -1 otherwise.
1007  */
timerlat_aa_init(struct osnoise_tool * tool,int dump_tasks)1008 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1009 {
1010 	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1011 	struct timerlat_aa_context *taa_ctx;
1012 	int retval;
1013 
1014 	taa_ctx = calloc(1, sizeof(*taa_ctx));
1015 	if (!taa_ctx)
1016 		return -1;
1017 
1018 	__timerlat_aa_ctx = taa_ctx;
1019 
1020 	taa_ctx->nr_cpus = nr_cpus;
1021 	taa_ctx->tool = tool;
1022 	taa_ctx->dump_tasks = dump_tasks;
1023 
1024 	taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1025 	if (!taa_ctx->taa_data)
1026 		goto out_err;
1027 
1028 	retval = timerlat_aa_init_seqs(taa_ctx);
1029 	if (retval)
1030 		goto out_err;
1031 
1032 	retval = timerlat_aa_register_events(tool, dump_tasks);
1033 	if (retval)
1034 		goto out_err;
1035 
1036 	return 0;
1037 
1038 out_err:
1039 	timerlat_aa_destroy();
1040 	return -1;
1041 }
1042