1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "utils.h"
9 #include "osnoise.h"
10 #include "timerlat.h"
11 #include <unistd.h>
12
13 enum timelat_state {
14 TIMERLAT_INIT = 0,
15 TIMERLAT_WAITING_IRQ,
16 TIMERLAT_WAITING_THREAD,
17 };
18
19 #define MAX_COMM 24
20
21 /*
22 * Per-cpu data statistics and data.
23 */
24 struct timerlat_aa_data {
25 /* Current CPU state */
26 int curr_state;
27
28 /* timerlat IRQ latency */
29 unsigned long long tlat_irq_seqnum;
30 unsigned long long tlat_irq_latency;
31 unsigned long long tlat_irq_timstamp;
32
33 /* timerlat Thread latency */
34 unsigned long long tlat_thread_seqnum;
35 unsigned long long tlat_thread_latency;
36 unsigned long long tlat_thread_timstamp;
37
38 /*
39 * Information about the thread running when the IRQ
40 * arrived.
41 *
42 * This can be blocking or interference, depending on the
43 * priority of the thread. Assuming timerlat is the highest
44 * prio, it is blocking. If timerlat has a lower prio, it is
45 * interference.
46 * note: "unsigned long long" because they are fetch using tep_get_field_val();
47 */
48 unsigned long long run_thread_pid;
49 char run_thread_comm[MAX_COMM];
50 unsigned long long thread_blocking_duration;
51 unsigned long long max_exit_idle_latency;
52
53 /* Information about the timerlat timer irq */
54 unsigned long long timer_irq_start_time;
55 unsigned long long timer_irq_start_delay;
56 unsigned long long timer_irq_duration;
57 unsigned long long timer_exit_from_idle;
58
59 /*
60 * Information about the last IRQ before the timerlat irq
61 * arrived.
62 *
63 * If now - timestamp is <= latency, it might have influenced
64 * in the timerlat irq latency. Otherwise, ignore it.
65 */
66 unsigned long long prev_irq_duration;
67 unsigned long long prev_irq_timstamp;
68
69 /*
70 * Interference sum.
71 */
72 unsigned long long thread_nmi_sum;
73 unsigned long long thread_irq_sum;
74 unsigned long long thread_softirq_sum;
75 unsigned long long thread_thread_sum;
76
77 /*
78 * Interference task information.
79 */
80 struct trace_seq *prev_irqs_seq;
81 struct trace_seq *nmi_seq;
82 struct trace_seq *irqs_seq;
83 struct trace_seq *softirqs_seq;
84 struct trace_seq *threads_seq;
85 struct trace_seq *stack_seq;
86
87 /*
88 * Current thread.
89 */
90 char current_comm[MAX_COMM];
91 unsigned long long current_pid;
92
93 /*
94 * Is the system running a kworker?
95 */
96 unsigned long long kworker;
97 unsigned long long kworker_func;
98 };
99
100 /*
101 * The analysis context and system wide view
102 */
103 struct timerlat_aa_context {
104 int nr_cpus;
105 int dump_tasks;
106
107 /* per CPU data */
108 struct timerlat_aa_data *taa_data;
109
110 /*
111 * required to translate function names and register
112 * events.
113 */
114 struct osnoise_tool *tool;
115 };
116
117 /*
118 * The data is stored as a local variable, but accessed via a helper function.
119 *
120 * It could be stored inside the trace context. But every access would
121 * require container_of() + a series of pointers. Do we need it? Not sure.
122 *
123 * For now keep it simple. If needed, store it in the tool, add the *context
124 * as a parameter in timerlat_aa_get_ctx() and do the magic there.
125 */
126 static struct timerlat_aa_context *__timerlat_aa_ctx;
127
timerlat_aa_get_ctx(void)128 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
129 {
130 return __timerlat_aa_ctx;
131 }
132
133 /*
134 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
135 */
136 static struct timerlat_aa_data
timerlat_aa_get_data(struct timerlat_aa_context * taa_ctx,int cpu)137 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
138 {
139 return &taa_ctx->taa_data[cpu];
140 }
141
142 /*
143 * timerlat_aa_irq_latency - Handles timerlat IRQ event
144 */
timerlat_aa_irq_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)145 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
146 struct trace_seq *s, struct tep_record *record,
147 struct tep_event *event)
148 {
149 /*
150 * For interference, we start now looking for things that can delay
151 * the thread.
152 */
153 taa_data->curr_state = TIMERLAT_WAITING_THREAD;
154 taa_data->tlat_irq_timstamp = record->ts;
155
156 /*
157 * Zero values.
158 */
159 taa_data->thread_nmi_sum = 0;
160 taa_data->thread_irq_sum = 0;
161 taa_data->thread_softirq_sum = 0;
162 taa_data->thread_thread_sum = 0;
163 taa_data->thread_blocking_duration = 0;
164 taa_data->timer_irq_start_time = 0;
165 taa_data->timer_irq_duration = 0;
166 taa_data->timer_exit_from_idle = 0;
167
168 /*
169 * Zero interference tasks.
170 */
171 trace_seq_reset(taa_data->nmi_seq);
172 trace_seq_reset(taa_data->irqs_seq);
173 trace_seq_reset(taa_data->softirqs_seq);
174 trace_seq_reset(taa_data->threads_seq);
175
176 /* IRQ latency values */
177 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
178 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
179
180 /* The thread that can cause blocking */
181 tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
182
183 /*
184 * Get exit from idle case.
185 *
186 * If it is not idle thread:
187 */
188 if (taa_data->run_thread_pid)
189 return 0;
190
191 /*
192 * if the latency is shorter than the known exit from idle:
193 */
194 if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
195 return 0;
196
197 /*
198 * To be safe, ignore the cases in which an IRQ/NMI could have
199 * interfered with the timerlat IRQ.
200 */
201 if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
202 < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
203 return 0;
204
205 taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
206
207 return 0;
208 }
209
210 /*
211 * timerlat_aa_thread_latency - Handles timerlat thread event
212 */
timerlat_aa_thread_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)213 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
214 struct trace_seq *s, struct tep_record *record,
215 struct tep_event *event)
216 {
217 /*
218 * For interference, we start now looking for things that can delay
219 * the IRQ of the next cycle.
220 */
221 taa_data->curr_state = TIMERLAT_WAITING_IRQ;
222 taa_data->tlat_thread_timstamp = record->ts;
223
224 /* Thread latency values */
225 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
226 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
227
228 return 0;
229 }
230
231 /*
232 * timerlat_aa_handler - Handle timerlat events
233 *
234 * This function is called to handle timerlat events recording statistics.
235 *
236 * Returns 0 on success, -1 otherwise.
237 */
timerlat_aa_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)238 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
239 struct tep_event *event, void *context)
240 {
241 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
242 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
243 unsigned long long thread;
244
245 if (!taa_data)
246 return -1;
247
248 tep_get_field_val(s, event, "context", record, &thread, 1);
249 if (!thread)
250 return timerlat_aa_irq_latency(taa_data, s, record, event);
251 else
252 return timerlat_aa_thread_latency(taa_data, s, record, event);
253 }
254
255 /*
256 * timerlat_aa_nmi_handler - Handles NMI noise
257 *
258 * It is used to collect information about interferences from NMI. It is
259 * hooked to the osnoise:nmi_noise event.
260 */
timerlat_aa_nmi_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)261 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
262 struct tep_event *event, void *context)
263 {
264 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
265 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
266 unsigned long long duration;
267 unsigned long long start;
268
269 tep_get_field_val(s, event, "duration", record, &duration, 1);
270 tep_get_field_val(s, event, "start", record, &start, 1);
271
272 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
273 taa_data->prev_irq_duration = duration;
274 taa_data->prev_irq_timstamp = start;
275
276 trace_seq_reset(taa_data->prev_irqs_seq);
277 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s \t\t\t%9.2f us\n",
278 "nmi", ns_to_usf(duration));
279 return 0;
280 }
281
282 taa_data->thread_nmi_sum += duration;
283 trace_seq_printf(taa_data->nmi_seq, " %24s \t\t\t%9.2f us\n",
284 "nmi", ns_to_usf(duration));
285
286 return 0;
287 }
288
289 /*
290 * timerlat_aa_irq_handler - Handles IRQ noise
291 *
292 * It is used to collect information about interferences from IRQ. It is
293 * hooked to the osnoise:irq_noise event.
294 *
295 * It is a little bit more complex than the other because it measures:
296 * - The IRQs that can delay the timer IRQ before it happened.
297 * - The Timerlat IRQ handler
298 * - The IRQs that happened between the timerlat IRQ and the timerlat thread
299 * (IRQ interference).
300 */
timerlat_aa_irq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)301 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
302 struct tep_event *event, void *context)
303 {
304 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
305 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
306 unsigned long long expected_start;
307 unsigned long long duration;
308 unsigned long long vector;
309 unsigned long long start;
310 char *desc;
311 int val;
312
313 tep_get_field_val(s, event, "duration", record, &duration, 1);
314 tep_get_field_val(s, event, "start", record, &start, 1);
315 tep_get_field_val(s, event, "vector", record, &vector, 1);
316 desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
317
318 /*
319 * Before the timerlat IRQ.
320 */
321 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
322 taa_data->prev_irq_duration = duration;
323 taa_data->prev_irq_timstamp = start;
324
325 trace_seq_reset(taa_data->prev_irqs_seq);
326 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu \t\t%9.2f us\n",
327 desc, vector, ns_to_usf(duration));
328 return 0;
329 }
330
331 /*
332 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
333 * the timerlat irq handler.
334 */
335 if (!taa_data->timer_irq_start_time) {
336 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
337
338 taa_data->timer_irq_start_time = start;
339 taa_data->timer_irq_duration = duration;
340
341 /*
342 * We are dealing with two different clock sources: the
343 * external clock source that timerlat uses as a reference
344 * and the clock used by the tracer. There are also two
345 * moments: the time reading the clock and the timer in
346 * which the event is placed in the buffer (the trace
347 * event timestamp). If the processor is slow or there
348 * is some hardware noise, the difference between the
349 * timestamp and the external clock read can be longer
350 * than the IRQ handler delay, resulting in a negative
351 * time. If so, set IRQ start delay as 0. In the end,
352 * it is less relevant than the noise.
353 */
354 if (expected_start < taa_data->timer_irq_start_time)
355 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
356 else
357 taa_data->timer_irq_start_delay = 0;
358
359 /*
360 * not exit from idle.
361 */
362 if (taa_data->run_thread_pid)
363 return 0;
364
365 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
366 taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
367
368 return 0;
369 }
370
371 /*
372 * IRQ interference.
373 */
374 taa_data->thread_irq_sum += duration;
375 trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu \t %9.2f us\n",
376 desc, vector, ns_to_usf(duration));
377
378 return 0;
379 }
380
381 static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
382 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
383
384
385 /*
386 * timerlat_aa_softirq_handler - Handles Softirq noise
387 *
388 * It is used to collect information about interferences from Softirq. It is
389 * hooked to the osnoise:softirq_noise event.
390 *
391 * It is only printed in the non-rt kernel, as softirqs become thread on RT.
392 */
timerlat_aa_softirq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)393 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
394 struct tep_event *event, void *context)
395 {
396 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
397 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
398 unsigned long long duration;
399 unsigned long long vector;
400 unsigned long long start;
401
402 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
403 return 0;
404
405 tep_get_field_val(s, event, "duration", record, &duration, 1);
406 tep_get_field_val(s, event, "start", record, &start, 1);
407 tep_get_field_val(s, event, "vector", record, &vector, 1);
408
409 taa_data->thread_softirq_sum += duration;
410
411 trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t %9.2f us\n",
412 softirq_name[vector], vector, ns_to_usf(duration));
413 return 0;
414 }
415
416 /*
417 * timerlat_aa_softirq_handler - Handles thread noise
418 *
419 * It is used to collect information about interferences from threads. It is
420 * hooked to the osnoise:thread_noise event.
421 *
422 * Note: if you see thread noise, your timerlat thread was not the highest prio one.
423 */
timerlat_aa_thread_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)424 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
425 struct tep_event *event, void *context)
426 {
427 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
428 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
429 unsigned long long duration;
430 unsigned long long start;
431 unsigned long long pid;
432 const char *comm;
433 int val;
434
435 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
436 return 0;
437
438 tep_get_field_val(s, event, "duration", record, &duration, 1);
439 tep_get_field_val(s, event, "start", record, &start, 1);
440
441 tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
442 comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
443
444 if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
445 taa_data->thread_blocking_duration = duration;
446
447 if (comm)
448 strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
449 else
450 sprintf(taa_data->run_thread_comm, "<...>");
451
452 } else {
453 taa_data->thread_thread_sum += duration;
454
455 trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu \t\t%9.2f us\n",
456 comm, pid, ns_to_usf(duration));
457 }
458
459 return 0;
460 }
461
462 /*
463 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
464 *
465 * Saves and parse the stack trace generated by the timerlat IRQ.
466 */
timerlat_aa_stack_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)467 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
468 struct tep_event *event, void *context)
469 {
470 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
471 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
472 unsigned long *caller;
473 const char *function;
474 int val, i;
475
476 trace_seq_reset(taa_data->stack_seq);
477
478 trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n");
479 caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
480 if (caller) {
481 for (i = 0; ; i++) {
482 function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
483 if (!function)
484 break;
485 trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
486 }
487 }
488 return 0;
489 }
490
491 /*
492 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
493 *
494 * Handles the sched:sched_switch event to trace the current thread running on the
495 * CPU. It is used to display the threads running on the other CPUs when the trace
496 * stops.
497 */
timerlat_aa_sched_switch_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)498 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
499 struct tep_event *event, void *context)
500 {
501 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
502 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
503 const char *comm;
504 int val;
505
506 tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
507 comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
508
509 strncpy(taa_data->current_comm, comm, MAX_COMM);
510
511 /*
512 * If this was a kworker, clean the last kworkers that ran.
513 */
514 taa_data->kworker = 0;
515 taa_data->kworker_func = 0;
516
517 return 0;
518 }
519
520 /*
521 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
522 *
523 * Handles workqueue:workqueue_execute_start event, keeping track of
524 * the job that a kworker could be doing in the CPU.
525 *
526 * We already catch problems of hardware related latencies caused by work queues
527 * running driver code that causes hardware stall. For example, with DRM drivers.
528 */
timerlat_aa_kworker_start_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)529 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
530 struct tep_event *event, void *context)
531 {
532 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
533 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
534
535 tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
536 tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
537 return 0;
538 }
539
540 /*
541 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
542 *
543 * This is the core of the analysis.
544 */
timerlat_thread_analysis(struct timerlat_aa_data * taa_data,int cpu,int irq_thresh,int thread_thresh)545 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
546 int irq_thresh, int thread_thresh)
547 {
548 long long exp_irq_ts;
549 int total;
550 int irq;
551
552 /*
553 * IRQ latency or Thread latency?
554 */
555 if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
556 irq = 1;
557 total = taa_data->tlat_irq_latency;
558 } else {
559 irq = 0;
560 total = taa_data->tlat_thread_latency;
561 }
562
563 /*
564 * Expected IRQ arrival time using the trace clock as the base.
565 *
566 * TODO: Add a list of previous IRQ, and then run the list backwards.
567 */
568 exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
569 if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
570 if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
571 printf(" Previous IRQ interference: \t\t up to %9.2f us\n",
572 ns_to_usf(taa_data->prev_irq_duration));
573 }
574
575 /*
576 * The delay that the IRQ suffered before starting.
577 */
578 printf(" IRQ handler delay: %16s %9.2f us (%.2f %%)\n",
579 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
580 ns_to_usf(taa_data->timer_irq_start_delay),
581 ns_to_per(total, taa_data->timer_irq_start_delay));
582
583 /*
584 * Timerlat IRQ.
585 */
586 printf(" IRQ latency: \t\t\t\t %9.2f us\n",
587 ns_to_usf(taa_data->tlat_irq_latency));
588
589 if (irq) {
590 /*
591 * If the trace stopped due to IRQ, the other events will not happen
592 * because... the trace stopped :-).
593 *
594 * That is all folks, the stack trace was printed before the stop,
595 * so it will be displayed, it is the key.
596 */
597 printf(" Blocking thread:\n");
598 printf(" %24s:%-9llu\n",
599 taa_data->run_thread_comm, taa_data->run_thread_pid);
600 } else {
601 /*
602 * The duration of the IRQ handler that handled the timerlat IRQ.
603 */
604 printf(" Timerlat IRQ duration: \t\t %9.2f us (%.2f %%)\n",
605 ns_to_usf(taa_data->timer_irq_duration),
606 ns_to_per(total, taa_data->timer_irq_duration));
607
608 /*
609 * The amount of time that the current thread postponed the scheduler.
610 *
611 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
612 * is no need to compute values here.
613 */
614 printf(" Blocking thread: \t\t\t %9.2f us (%.2f %%)\n",
615 ns_to_usf(taa_data->thread_blocking_duration),
616 ns_to_per(total, taa_data->thread_blocking_duration));
617
618 printf(" %24s:%-9llu %9.2f us\n",
619 taa_data->run_thread_comm, taa_data->run_thread_pid,
620 ns_to_usf(taa_data->thread_blocking_duration));
621 }
622
623 /*
624 * Print the stack trace!
625 */
626 trace_seq_do_printf(taa_data->stack_seq);
627
628 /*
629 * NMIs can happen during the IRQ, so they are always possible.
630 */
631 if (taa_data->thread_nmi_sum)
632 printf(" NMI interference \t\t\t %9.2f us (%.2f %%)\n",
633 ns_to_usf(taa_data->thread_nmi_sum),
634 ns_to_per(total, taa_data->thread_nmi_sum));
635
636 /*
637 * If it is an IRQ latency, the other factors can be skipped.
638 */
639 if (irq)
640 goto print_total;
641
642 /*
643 * Prints the interference caused by IRQs to the thread latency.
644 */
645 if (taa_data->thread_irq_sum) {
646 printf(" IRQ interference \t\t\t %9.2f us (%.2f %%)\n",
647 ns_to_usf(taa_data->thread_irq_sum),
648 ns_to_per(total, taa_data->thread_irq_sum));
649
650 trace_seq_do_printf(taa_data->irqs_seq);
651 }
652
653 /*
654 * Prints the interference caused by Softirqs to the thread latency.
655 */
656 if (taa_data->thread_softirq_sum) {
657 printf(" Softirq interference \t\t\t %9.2f us (%.2f %%)\n",
658 ns_to_usf(taa_data->thread_softirq_sum),
659 ns_to_per(total, taa_data->thread_softirq_sum));
660
661 trace_seq_do_printf(taa_data->softirqs_seq);
662 }
663
664 /*
665 * Prints the interference caused by other threads to the thread latency.
666 *
667 * If this happens, your timerlat is not the highest prio. OK, migration
668 * thread can happen. But otherwise, you are not measuring the "scheduling
669 * latency" only, and here is the difference from scheduling latency and
670 * timer handling latency.
671 */
672 if (taa_data->thread_thread_sum) {
673 printf(" Thread interference \t\t\t %9.2f us (%.2f %%)\n",
674 ns_to_usf(taa_data->thread_thread_sum),
675 ns_to_per(total, taa_data->thread_thread_sum));
676
677 trace_seq_do_printf(taa_data->threads_seq);
678 }
679
680 /*
681 * Done.
682 */
683 print_total:
684 printf("------------------------------------------------------------------------\n");
685 printf(" %s latency: \t\t\t %9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
686 ns_to_usf(total));
687 }
688
timerlat_auto_analysis_collect_trace(struct timerlat_aa_context * taa_ctx)689 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
690 {
691 struct trace_instance *trace = &taa_ctx->tool->trace;
692 int retval;
693
694 retval = tracefs_iterate_raw_events(trace->tep,
695 trace->inst,
696 NULL,
697 0,
698 collect_registered_events,
699 trace);
700 if (retval < 0) {
701 err_msg("Error iterating on events\n");
702 return 0;
703 }
704
705 return 1;
706 }
707
708 /**
709 * timerlat_auto_analysis - Analyze the collected data
710 */
timerlat_auto_analysis(int irq_thresh,int thread_thresh)711 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
712 {
713 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
714 unsigned long long max_exit_from_idle = 0;
715 struct timerlat_aa_data *taa_data;
716 int max_exit_from_idle_cpu;
717 struct tep_handle *tep;
718 int cpu;
719
720 timerlat_auto_analysis_collect_trace(taa_ctx);
721
722 /* bring stop tracing to the ns scale */
723 irq_thresh = irq_thresh * 1000;
724 thread_thresh = thread_thresh * 1000;
725
726 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
727 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
728
729 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
730 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
731 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
732 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
733 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
734 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
735 }
736
737 if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
738 max_exit_from_idle = taa_data->max_exit_idle_latency;
739 max_exit_from_idle_cpu = cpu;
740 }
741
742 }
743
744 if (max_exit_from_idle) {
745 printf("\n");
746 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
747 ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
748 }
749 if (!taa_ctx->dump_tasks)
750 return;
751
752 printf("\n");
753 printf("Printing CPU tasks:\n");
754 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
755 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
756 tep = taa_ctx->tool->trace.tep;
757
758 printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
759
760 if (taa_data->kworker_func)
761 printf(" kworker:%s:%s",
762 tep_find_function(tep, taa_data->kworker) ? : "<...>",
763 tep_find_function(tep, taa_data->kworker_func));
764 printf("\n");
765 }
766
767 }
768
769 /*
770 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
771 */
timerlat_aa_destroy_seqs(struct timerlat_aa_context * taa_ctx)772 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
773 {
774 struct timerlat_aa_data *taa_data;
775 int i;
776
777 if (!taa_ctx->taa_data)
778 return;
779
780 for (i = 0; i < taa_ctx->nr_cpus; i++) {
781 taa_data = timerlat_aa_get_data(taa_ctx, i);
782
783 if (taa_data->prev_irqs_seq) {
784 trace_seq_destroy(taa_data->prev_irqs_seq);
785 free(taa_data->prev_irqs_seq);
786 }
787
788 if (taa_data->nmi_seq) {
789 trace_seq_destroy(taa_data->nmi_seq);
790 free(taa_data->nmi_seq);
791 }
792
793 if (taa_data->irqs_seq) {
794 trace_seq_destroy(taa_data->irqs_seq);
795 free(taa_data->irqs_seq);
796 }
797
798 if (taa_data->softirqs_seq) {
799 trace_seq_destroy(taa_data->softirqs_seq);
800 free(taa_data->softirqs_seq);
801 }
802
803 if (taa_data->threads_seq) {
804 trace_seq_destroy(taa_data->threads_seq);
805 free(taa_data->threads_seq);
806 }
807
808 if (taa_data->stack_seq) {
809 trace_seq_destroy(taa_data->stack_seq);
810 free(taa_data->stack_seq);
811 }
812 }
813 }
814
815 /*
816 * timerlat_aa_init_seqs - Init seq files used to store parsed information
817 *
818 * Instead of keeping data structures to store raw data, use seq files to
819 * store parsed data.
820 *
821 * Allocates and initialize seq files.
822 *
823 * Returns 0 on success, -1 otherwise.
824 */
timerlat_aa_init_seqs(struct timerlat_aa_context * taa_ctx)825 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
826 {
827 struct timerlat_aa_data *taa_data;
828 int i;
829
830 for (i = 0; i < taa_ctx->nr_cpus; i++) {
831
832 taa_data = timerlat_aa_get_data(taa_ctx, i);
833
834 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
835 if (!taa_data->prev_irqs_seq)
836 goto out_err;
837
838 trace_seq_init(taa_data->prev_irqs_seq);
839
840 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
841 if (!taa_data->nmi_seq)
842 goto out_err;
843
844 trace_seq_init(taa_data->nmi_seq);
845
846 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
847 if (!taa_data->irqs_seq)
848 goto out_err;
849
850 trace_seq_init(taa_data->irqs_seq);
851
852 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
853 if (!taa_data->softirqs_seq)
854 goto out_err;
855
856 trace_seq_init(taa_data->softirqs_seq);
857
858 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
859 if (!taa_data->threads_seq)
860 goto out_err;
861
862 trace_seq_init(taa_data->threads_seq);
863
864 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
865 if (!taa_data->stack_seq)
866 goto out_err;
867
868 trace_seq_init(taa_data->stack_seq);
869 }
870
871 return 0;
872
873 out_err:
874 timerlat_aa_destroy_seqs(taa_ctx);
875 return -1;
876 }
877
878 /*
879 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
880 */
timerlat_aa_unregister_events(struct osnoise_tool * tool,int dump_tasks)881 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
882 {
883
884 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
885 timerlat_aa_handler, tool);
886
887 tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
888
889 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
890 timerlat_aa_nmi_handler, tool);
891
892 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
893 timerlat_aa_irq_handler, tool);
894
895 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
896 timerlat_aa_softirq_handler, tool);
897
898 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
899 timerlat_aa_thread_handler, tool);
900
901 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
902 timerlat_aa_stack_handler, tool);
903 if (!dump_tasks)
904 return;
905
906 tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
907 tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
908 timerlat_aa_sched_switch_handler, tool);
909
910 tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
911 tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
912 timerlat_aa_kworker_start_handler, tool);
913 }
914
915 /*
916 * timerlat_aa_register_events - Register events used in the auto-analysis
917 *
918 * Returns 0 on success, -1 otherwise.
919 */
timerlat_aa_register_events(struct osnoise_tool * tool,int dump_tasks)920 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
921 {
922 int retval;
923
924 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
925 timerlat_aa_handler, tool);
926
927
928 /*
929 * register auto-analysis handlers.
930 */
931 retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
932 if (retval < 0 && !errno) {
933 err_msg("Could not find osnoise events\n");
934 goto out_err;
935 }
936
937 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
938 timerlat_aa_nmi_handler, tool);
939
940 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
941 timerlat_aa_irq_handler, tool);
942
943 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
944 timerlat_aa_softirq_handler, tool);
945
946 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
947 timerlat_aa_thread_handler, tool);
948
949 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
950 timerlat_aa_stack_handler, tool);
951
952 if (!dump_tasks)
953 return 0;
954
955 /*
956 * Dump task events.
957 */
958 retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
959 if (retval < 0 && !errno) {
960 err_msg("Could not find sched_switch\n");
961 goto out_err;
962 }
963
964 tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
965 timerlat_aa_sched_switch_handler, tool);
966
967 retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
968 if (retval < 0 && !errno) {
969 err_msg("Could not find workqueue_execute_start\n");
970 goto out_err;
971 }
972
973 tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
974 timerlat_aa_kworker_start_handler, tool);
975
976 return 0;
977
978 out_err:
979 timerlat_aa_unregister_events(tool, dump_tasks);
980 return -1;
981 }
982
983 /**
984 * timerlat_aa_destroy - Destroy timerlat auto-analysis
985 */
timerlat_aa_destroy(void)986 void timerlat_aa_destroy(void)
987 {
988 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
989
990 if (!taa_ctx)
991 return;
992
993 if (!taa_ctx->taa_data)
994 goto out_ctx;
995
996 timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
997 timerlat_aa_destroy_seqs(taa_ctx);
998 free(taa_ctx->taa_data);
999 out_ctx:
1000 free(taa_ctx);
1001 }
1002
1003 /**
1004 * timerlat_aa_init - Initialize timerlat auto-analysis
1005 *
1006 * Returns 0 on success, -1 otherwise.
1007 */
timerlat_aa_init(struct osnoise_tool * tool,int dump_tasks)1008 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1009 {
1010 int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1011 struct timerlat_aa_context *taa_ctx;
1012 int retval;
1013
1014 taa_ctx = calloc(1, sizeof(*taa_ctx));
1015 if (!taa_ctx)
1016 return -1;
1017
1018 __timerlat_aa_ctx = taa_ctx;
1019
1020 taa_ctx->nr_cpus = nr_cpus;
1021 taa_ctx->tool = tool;
1022 taa_ctx->dump_tasks = dump_tasks;
1023
1024 taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1025 if (!taa_ctx->taa_data)
1026 goto out_err;
1027
1028 retval = timerlat_aa_init_seqs(taa_ctx);
1029 if (retval)
1030 goto out_err;
1031
1032 retval = timerlat_aa_register_events(tool, dump_tasks);
1033 if (retval)
1034 goto out_err;
1035
1036 return 0;
1037
1038 out_err:
1039 timerlat_aa_destroy();
1040 return -1;
1041 }
1042