1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6 
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/callchain.h"
14 #include "util/lock-contention.h"
15 #include "util/bpf_skel/lock_data.h"
16 
17 #include <subcmd/pager.h>
18 #include <subcmd/parse-options.h>
19 #include "util/trace-event.h"
20 #include "util/tracepoint.h"
21 
22 #include "util/debug.h"
23 #include "util/session.h"
24 #include "util/tool.h"
25 #include "util/data.h"
26 #include "util/string2.h"
27 #include "util/map.h"
28 #include "util/util.h"
29 
30 #include <stdio.h>
31 #include <sys/types.h>
32 #include <sys/prctl.h>
33 #include <semaphore.h>
34 #include <math.h>
35 #include <limits.h>
36 #include <ctype.h>
37 
38 #include <linux/list.h>
39 #include <linux/hash.h>
40 #include <linux/kernel.h>
41 #include <linux/zalloc.h>
42 #include <linux/err.h>
43 #include <linux/stringify.h>
44 
45 static struct perf_session *session;
46 static struct target target;
47 
48 /* based on kernel/lockdep.c */
49 #define LOCKHASH_BITS		12
50 #define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
51 
52 static struct hlist_head *lockhash_table;
53 
54 #define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
55 #define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
56 
57 static struct rb_root		thread_stats;
58 
59 static bool combine_locks;
60 static bool show_thread_stats;
61 static bool show_lock_addrs;
62 static bool show_lock_owner;
63 static bool use_bpf;
64 static unsigned long bpf_map_entries = MAX_ENTRIES;
65 static int max_stack_depth = CONTENTION_STACK_DEPTH;
66 static int stack_skip = CONTENTION_STACK_SKIP;
67 static int print_nr_entries = INT_MAX / 2;
68 static LIST_HEAD(callstack_filters);
69 static const char *output_name = NULL;
70 static FILE *lock_output;
71 
72 struct callstack_filter {
73 	struct list_head list;
74 	char name[];
75 };
76 
77 static struct lock_filter filters;
78 
79 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
80 
needs_callstack(void)81 static bool needs_callstack(void)
82 {
83 	return !list_empty(&callstack_filters);
84 }
85 
thread_stat_find(u32 tid)86 static struct thread_stat *thread_stat_find(u32 tid)
87 {
88 	struct rb_node *node;
89 	struct thread_stat *st;
90 
91 	node = thread_stats.rb_node;
92 	while (node) {
93 		st = container_of(node, struct thread_stat, rb);
94 		if (st->tid == tid)
95 			return st;
96 		else if (tid < st->tid)
97 			node = node->rb_left;
98 		else
99 			node = node->rb_right;
100 	}
101 
102 	return NULL;
103 }
104 
thread_stat_insert(struct thread_stat * new)105 static void thread_stat_insert(struct thread_stat *new)
106 {
107 	struct rb_node **rb = &thread_stats.rb_node;
108 	struct rb_node *parent = NULL;
109 	struct thread_stat *p;
110 
111 	while (*rb) {
112 		p = container_of(*rb, struct thread_stat, rb);
113 		parent = *rb;
114 
115 		if (new->tid < p->tid)
116 			rb = &(*rb)->rb_left;
117 		else if (new->tid > p->tid)
118 			rb = &(*rb)->rb_right;
119 		else
120 			BUG_ON("inserting invalid thread_stat\n");
121 	}
122 
123 	rb_link_node(&new->rb, parent, rb);
124 	rb_insert_color(&new->rb, &thread_stats);
125 }
126 
thread_stat_findnew_after_first(u32 tid)127 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
128 {
129 	struct thread_stat *st;
130 
131 	st = thread_stat_find(tid);
132 	if (st)
133 		return st;
134 
135 	st = zalloc(sizeof(struct thread_stat));
136 	if (!st) {
137 		pr_err("memory allocation failed\n");
138 		return NULL;
139 	}
140 
141 	st->tid = tid;
142 	INIT_LIST_HEAD(&st->seq_list);
143 
144 	thread_stat_insert(st);
145 
146 	return st;
147 }
148 
149 static struct thread_stat *thread_stat_findnew_first(u32 tid);
150 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
151 	thread_stat_findnew_first;
152 
thread_stat_findnew_first(u32 tid)153 static struct thread_stat *thread_stat_findnew_first(u32 tid)
154 {
155 	struct thread_stat *st;
156 
157 	st = zalloc(sizeof(struct thread_stat));
158 	if (!st) {
159 		pr_err("memory allocation failed\n");
160 		return NULL;
161 	}
162 	st->tid = tid;
163 	INIT_LIST_HEAD(&st->seq_list);
164 
165 	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
166 	rb_insert_color(&st->rb, &thread_stats);
167 
168 	thread_stat_findnew = thread_stat_findnew_after_first;
169 	return st;
170 }
171 
172 /* build simple key function one is bigger than two */
173 #define SINGLE_KEY(member)						\
174 	static int lock_stat_key_ ## member(struct lock_stat *one,	\
175 					 struct lock_stat *two)		\
176 	{								\
177 		return one->member > two->member;			\
178 	}
179 
180 SINGLE_KEY(nr_acquired)
SINGLE_KEY(nr_contended)181 SINGLE_KEY(nr_contended)
182 SINGLE_KEY(avg_wait_time)
183 SINGLE_KEY(wait_time_total)
184 SINGLE_KEY(wait_time_max)
185 
186 static int lock_stat_key_wait_time_min(struct lock_stat *one,
187 					struct lock_stat *two)
188 {
189 	u64 s1 = one->wait_time_min;
190 	u64 s2 = two->wait_time_min;
191 	if (s1 == ULLONG_MAX)
192 		s1 = 0;
193 	if (s2 == ULLONG_MAX)
194 		s2 = 0;
195 	return s1 > s2;
196 }
197 
198 struct lock_key {
199 	/*
200 	 * name: the value for specify by user
201 	 * this should be simpler than raw name of member
202 	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
203 	 */
204 	const char		*name;
205 	/* header: the string printed on the header line */
206 	const char		*header;
207 	/* len: the printing width of the field */
208 	int			len;
209 	/* key: a pointer to function to compare two lock stats for sorting */
210 	int			(*key)(struct lock_stat*, struct lock_stat*);
211 	/* print: a pointer to function to print a given lock stats */
212 	void			(*print)(struct lock_key*, struct lock_stat*);
213 	/* list: list entry to link this */
214 	struct list_head	list;
215 };
216 
lock_stat_key_print_time(unsigned long long nsec,int len)217 static void lock_stat_key_print_time(unsigned long long nsec, int len)
218 {
219 	static const struct {
220 		float base;
221 		const char *unit;
222 	} table[] = {
223 		{ 1e9 * 3600, "h " },
224 		{ 1e9 * 60, "m " },
225 		{ 1e9, "s " },
226 		{ 1e6, "ms" },
227 		{ 1e3, "us" },
228 		{ 0, NULL },
229 	};
230 
231 	/* for CSV output */
232 	if (len == 0) {
233 		fprintf(lock_output, "%llu", nsec);
234 		return;
235 	}
236 
237 	for (int i = 0; table[i].unit; i++) {
238 		if (nsec < table[i].base)
239 			continue;
240 
241 		fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
242 		return;
243 	}
244 
245 	fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
246 }
247 
248 #define PRINT_KEY(member)						\
249 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
250 					   struct lock_stat *ls)	\
251 {									\
252 	fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
253 }
254 
255 #define PRINT_TIME(member)						\
256 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
257 					   struct lock_stat *ls)	\
258 {									\
259 	lock_stat_key_print_time((unsigned long long)ls->member, key->len);	\
260 }
261 
262 PRINT_KEY(nr_acquired)
PRINT_KEY(nr_contended)263 PRINT_KEY(nr_contended)
264 PRINT_TIME(avg_wait_time)
265 PRINT_TIME(wait_time_total)
266 PRINT_TIME(wait_time_max)
267 
268 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
269 					      struct lock_stat *ls)
270 {
271 	u64 wait_time = ls->wait_time_min;
272 
273 	if (wait_time == ULLONG_MAX)
274 		wait_time = 0;
275 
276 	lock_stat_key_print_time(wait_time, key->len);
277 }
278 
279 
280 static const char		*sort_key = "acquired";
281 
282 static int			(*compare)(struct lock_stat *, struct lock_stat *);
283 
284 static struct rb_root		sorted; /* place to store intermediate data */
285 static struct rb_root		result;	/* place to store sorted data */
286 
287 static LIST_HEAD(lock_keys);
288 static const char		*output_fields;
289 
290 #define DEF_KEY_LOCK(name, header, fn_suffix, len)			\
291 	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
292 static struct lock_key report_keys[] = {
293 	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
294 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
295 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
296 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
297 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
298 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
299 
300 	/* extra comparisons much complicated should be here */
301 	{ }
302 };
303 
304 static struct lock_key contention_keys[] = {
305 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
306 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
307 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
308 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
309 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
310 
311 	/* extra comparisons much complicated should be here */
312 	{ }
313 };
314 
select_key(bool contention)315 static int select_key(bool contention)
316 {
317 	int i;
318 	struct lock_key *keys = report_keys;
319 
320 	if (contention)
321 		keys = contention_keys;
322 
323 	for (i = 0; keys[i].name; i++) {
324 		if (!strcmp(keys[i].name, sort_key)) {
325 			compare = keys[i].key;
326 
327 			/* selected key should be in the output fields */
328 			if (list_empty(&keys[i].list))
329 				list_add_tail(&keys[i].list, &lock_keys);
330 
331 			return 0;
332 		}
333 	}
334 
335 	pr_err("Unknown compare key: %s\n", sort_key);
336 	return -1;
337 }
338 
add_output_field(bool contention,char * name)339 static int add_output_field(bool contention, char *name)
340 {
341 	int i;
342 	struct lock_key *keys = report_keys;
343 
344 	if (contention)
345 		keys = contention_keys;
346 
347 	for (i = 0; keys[i].name; i++) {
348 		if (strcmp(keys[i].name, name))
349 			continue;
350 
351 		/* prevent double link */
352 		if (list_empty(&keys[i].list))
353 			list_add_tail(&keys[i].list, &lock_keys);
354 
355 		return 0;
356 	}
357 
358 	pr_err("Unknown output field: %s\n", name);
359 	return -1;
360 }
361 
setup_output_field(bool contention,const char * str)362 static int setup_output_field(bool contention, const char *str)
363 {
364 	char *tok, *tmp, *orig;
365 	int i, ret = 0;
366 	struct lock_key *keys = report_keys;
367 
368 	if (contention)
369 		keys = contention_keys;
370 
371 	/* no output field given: use all of them */
372 	if (str == NULL) {
373 		for (i = 0; keys[i].name; i++)
374 			list_add_tail(&keys[i].list, &lock_keys);
375 		return 0;
376 	}
377 
378 	for (i = 0; keys[i].name; i++)
379 		INIT_LIST_HEAD(&keys[i].list);
380 
381 	orig = tmp = strdup(str);
382 	if (orig == NULL)
383 		return -ENOMEM;
384 
385 	while ((tok = strsep(&tmp, ",")) != NULL){
386 		ret = add_output_field(contention, tok);
387 		if (ret < 0)
388 			break;
389 	}
390 	free(orig);
391 
392 	return ret;
393 }
394 
combine_lock_stats(struct lock_stat * st)395 static void combine_lock_stats(struct lock_stat *st)
396 {
397 	struct rb_node **rb = &sorted.rb_node;
398 	struct rb_node *parent = NULL;
399 	struct lock_stat *p;
400 	int ret;
401 
402 	while (*rb) {
403 		p = container_of(*rb, struct lock_stat, rb);
404 		parent = *rb;
405 
406 		if (st->name && p->name)
407 			ret = strcmp(st->name, p->name);
408 		else
409 			ret = !!st->name - !!p->name;
410 
411 		if (ret == 0) {
412 			p->nr_acquired += st->nr_acquired;
413 			p->nr_contended += st->nr_contended;
414 			p->wait_time_total += st->wait_time_total;
415 
416 			if (p->nr_contended)
417 				p->avg_wait_time = p->wait_time_total / p->nr_contended;
418 
419 			if (p->wait_time_min > st->wait_time_min)
420 				p->wait_time_min = st->wait_time_min;
421 			if (p->wait_time_max < st->wait_time_max)
422 				p->wait_time_max = st->wait_time_max;
423 
424 			p->broken |= st->broken;
425 			st->combined = 1;
426 			return;
427 		}
428 
429 		if (ret < 0)
430 			rb = &(*rb)->rb_left;
431 		else
432 			rb = &(*rb)->rb_right;
433 	}
434 
435 	rb_link_node(&st->rb, parent, rb);
436 	rb_insert_color(&st->rb, &sorted);
437 }
438 
insert_to_result(struct lock_stat * st,int (* bigger)(struct lock_stat *,struct lock_stat *))439 static void insert_to_result(struct lock_stat *st,
440 			     int (*bigger)(struct lock_stat *, struct lock_stat *))
441 {
442 	struct rb_node **rb = &result.rb_node;
443 	struct rb_node *parent = NULL;
444 	struct lock_stat *p;
445 
446 	if (combine_locks && st->combined)
447 		return;
448 
449 	while (*rb) {
450 		p = container_of(*rb, struct lock_stat, rb);
451 		parent = *rb;
452 
453 		if (bigger(st, p))
454 			rb = &(*rb)->rb_left;
455 		else
456 			rb = &(*rb)->rb_right;
457 	}
458 
459 	rb_link_node(&st->rb, parent, rb);
460 	rb_insert_color(&st->rb, &result);
461 }
462 
463 /* returns left most element of result, and erase it */
pop_from_result(void)464 static struct lock_stat *pop_from_result(void)
465 {
466 	struct rb_node *node = result.rb_node;
467 
468 	if (!node)
469 		return NULL;
470 
471 	while (node->rb_left)
472 		node = node->rb_left;
473 
474 	rb_erase(node, &result);
475 	return container_of(node, struct lock_stat, rb);
476 }
477 
lock_stat_find(u64 addr)478 struct lock_stat *lock_stat_find(u64 addr)
479 {
480 	struct hlist_head *entry = lockhashentry(addr);
481 	struct lock_stat *ret;
482 
483 	hlist_for_each_entry(ret, entry, hash_entry) {
484 		if (ret->addr == addr)
485 			return ret;
486 	}
487 	return NULL;
488 }
489 
lock_stat_findnew(u64 addr,const char * name,int flags)490 struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
491 {
492 	struct hlist_head *entry = lockhashentry(addr);
493 	struct lock_stat *ret, *new;
494 
495 	hlist_for_each_entry(ret, entry, hash_entry) {
496 		if (ret->addr == addr)
497 			return ret;
498 	}
499 
500 	new = zalloc(sizeof(struct lock_stat));
501 	if (!new)
502 		goto alloc_failed;
503 
504 	new->addr = addr;
505 	new->name = strdup(name);
506 	if (!new->name) {
507 		free(new);
508 		goto alloc_failed;
509 	}
510 
511 	new->flags = flags;
512 	new->wait_time_min = ULLONG_MAX;
513 
514 	hlist_add_head(&new->hash_entry, entry);
515 	return new;
516 
517 alloc_failed:
518 	pr_err("memory allocation failed\n");
519 	return NULL;
520 }
521 
match_callstack_filter(struct machine * machine,u64 * callstack)522 bool match_callstack_filter(struct machine *machine, u64 *callstack)
523 {
524 	struct map *kmap;
525 	struct symbol *sym;
526 	u64 ip;
527 	const char *arch = perf_env__arch(machine->env);
528 
529 	if (list_empty(&callstack_filters))
530 		return true;
531 
532 	for (int i = 0; i < max_stack_depth; i++) {
533 		struct callstack_filter *filter;
534 
535 		/*
536 		 * In powerpc, the callchain saved by kernel always includes
537 		 * first three entries as the NIP (next instruction pointer),
538 		 * LR (link register), and the contents of LR save area in the
539 		 * second stack frame. In certain scenarios its possible to have
540 		 * invalid kernel instruction addresses in either LR or the second
541 		 * stack frame's LR. In that case, kernel will store that address as
542 		 * zero.
543 		 *
544 		 * The below check will continue to look into callstack,
545 		 * incase first or second callstack index entry has 0
546 		 * address for powerpc.
547 		 */
548 		if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
549 						(i != 1 && i != 2))))
550 			break;
551 
552 		ip = callstack[i];
553 		sym = machine__find_kernel_symbol(machine, ip, &kmap);
554 		if (sym == NULL)
555 			continue;
556 
557 		list_for_each_entry(filter, &callstack_filters, list) {
558 			if (strstr(sym->name, filter->name))
559 				return true;
560 		}
561 	}
562 	return false;
563 }
564 
565 struct trace_lock_handler {
566 	/* it's used on CONFIG_LOCKDEP */
567 	int (*acquire_event)(struct evsel *evsel,
568 			     struct perf_sample *sample);
569 
570 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
571 	int (*acquired_event)(struct evsel *evsel,
572 			      struct perf_sample *sample);
573 
574 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
575 	int (*contended_event)(struct evsel *evsel,
576 			       struct perf_sample *sample);
577 
578 	/* it's used on CONFIG_LOCKDEP */
579 	int (*release_event)(struct evsel *evsel,
580 			     struct perf_sample *sample);
581 
582 	/* it's used when CONFIG_LOCKDEP is off */
583 	int (*contention_begin_event)(struct evsel *evsel,
584 				      struct perf_sample *sample);
585 
586 	/* it's used when CONFIG_LOCKDEP is off */
587 	int (*contention_end_event)(struct evsel *evsel,
588 				    struct perf_sample *sample);
589 };
590 
get_seq(struct thread_stat * ts,u64 addr)591 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
592 {
593 	struct lock_seq_stat *seq;
594 
595 	list_for_each_entry(seq, &ts->seq_list, list) {
596 		if (seq->addr == addr)
597 			return seq;
598 	}
599 
600 	seq = zalloc(sizeof(struct lock_seq_stat));
601 	if (!seq) {
602 		pr_err("memory allocation failed\n");
603 		return NULL;
604 	}
605 	seq->state = SEQ_STATE_UNINITIALIZED;
606 	seq->addr = addr;
607 
608 	list_add(&seq->list, &ts->seq_list);
609 	return seq;
610 }
611 
612 enum broken_state {
613 	BROKEN_ACQUIRE,
614 	BROKEN_ACQUIRED,
615 	BROKEN_CONTENDED,
616 	BROKEN_RELEASE,
617 	BROKEN_MAX,
618 };
619 
620 static int bad_hist[BROKEN_MAX];
621 
622 enum acquire_flags {
623 	TRY_LOCK = 1,
624 	READ_LOCK = 2,
625 };
626 
get_key_by_aggr_mode_simple(u64 * key,u64 addr,u32 tid)627 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
628 {
629 	switch (aggr_mode) {
630 	case LOCK_AGGR_ADDR:
631 		*key = addr;
632 		break;
633 	case LOCK_AGGR_TASK:
634 		*key = tid;
635 		break;
636 	case LOCK_AGGR_CALLER:
637 	default:
638 		pr_err("Invalid aggregation mode: %d\n", aggr_mode);
639 		return -EINVAL;
640 	}
641 	return 0;
642 }
643 
644 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
645 
get_key_by_aggr_mode(u64 * key,u64 addr,struct evsel * evsel,struct perf_sample * sample)646 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
647 				 struct perf_sample *sample)
648 {
649 	if (aggr_mode == LOCK_AGGR_CALLER) {
650 		*key = callchain_id(evsel, sample);
651 		return 0;
652 	}
653 	return get_key_by_aggr_mode_simple(key, addr, sample->tid);
654 }
655 
report_lock_acquire_event(struct evsel * evsel,struct perf_sample * sample)656 static int report_lock_acquire_event(struct evsel *evsel,
657 				     struct perf_sample *sample)
658 {
659 	struct lock_stat *ls;
660 	struct thread_stat *ts;
661 	struct lock_seq_stat *seq;
662 	const char *name = evsel__strval(evsel, sample, "name");
663 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
664 	int flag = evsel__intval(evsel, sample, "flags");
665 	u64 key;
666 	int ret;
667 
668 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
669 	if (ret < 0)
670 		return ret;
671 
672 	ls = lock_stat_findnew(key, name, 0);
673 	if (!ls)
674 		return -ENOMEM;
675 
676 	ts = thread_stat_findnew(sample->tid);
677 	if (!ts)
678 		return -ENOMEM;
679 
680 	seq = get_seq(ts, addr);
681 	if (!seq)
682 		return -ENOMEM;
683 
684 	switch (seq->state) {
685 	case SEQ_STATE_UNINITIALIZED:
686 	case SEQ_STATE_RELEASED:
687 		if (!flag) {
688 			seq->state = SEQ_STATE_ACQUIRING;
689 		} else {
690 			if (flag & TRY_LOCK)
691 				ls->nr_trylock++;
692 			if (flag & READ_LOCK)
693 				ls->nr_readlock++;
694 			seq->state = SEQ_STATE_READ_ACQUIRED;
695 			seq->read_count = 1;
696 			ls->nr_acquired++;
697 		}
698 		break;
699 	case SEQ_STATE_READ_ACQUIRED:
700 		if (flag & READ_LOCK) {
701 			seq->read_count++;
702 			ls->nr_acquired++;
703 			goto end;
704 		} else {
705 			goto broken;
706 		}
707 		break;
708 	case SEQ_STATE_ACQUIRED:
709 	case SEQ_STATE_ACQUIRING:
710 	case SEQ_STATE_CONTENDED:
711 broken:
712 		/* broken lock sequence */
713 		if (!ls->broken) {
714 			ls->broken = 1;
715 			bad_hist[BROKEN_ACQUIRE]++;
716 		}
717 		list_del_init(&seq->list);
718 		free(seq);
719 		goto end;
720 	default:
721 		BUG_ON("Unknown state of lock sequence found!\n");
722 		break;
723 	}
724 
725 	ls->nr_acquire++;
726 	seq->prev_event_time = sample->time;
727 end:
728 	return 0;
729 }
730 
report_lock_acquired_event(struct evsel * evsel,struct perf_sample * sample)731 static int report_lock_acquired_event(struct evsel *evsel,
732 				      struct perf_sample *sample)
733 {
734 	struct lock_stat *ls;
735 	struct thread_stat *ts;
736 	struct lock_seq_stat *seq;
737 	u64 contended_term;
738 	const char *name = evsel__strval(evsel, sample, "name");
739 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
740 	u64 key;
741 	int ret;
742 
743 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
744 	if (ret < 0)
745 		return ret;
746 
747 	ls = lock_stat_findnew(key, name, 0);
748 	if (!ls)
749 		return -ENOMEM;
750 
751 	ts = thread_stat_findnew(sample->tid);
752 	if (!ts)
753 		return -ENOMEM;
754 
755 	seq = get_seq(ts, addr);
756 	if (!seq)
757 		return -ENOMEM;
758 
759 	switch (seq->state) {
760 	case SEQ_STATE_UNINITIALIZED:
761 		/* orphan event, do nothing */
762 		return 0;
763 	case SEQ_STATE_ACQUIRING:
764 		break;
765 	case SEQ_STATE_CONTENDED:
766 		contended_term = sample->time - seq->prev_event_time;
767 		ls->wait_time_total += contended_term;
768 		if (contended_term < ls->wait_time_min)
769 			ls->wait_time_min = contended_term;
770 		if (ls->wait_time_max < contended_term)
771 			ls->wait_time_max = contended_term;
772 		break;
773 	case SEQ_STATE_RELEASED:
774 	case SEQ_STATE_ACQUIRED:
775 	case SEQ_STATE_READ_ACQUIRED:
776 		/* broken lock sequence */
777 		if (!ls->broken) {
778 			ls->broken = 1;
779 			bad_hist[BROKEN_ACQUIRED]++;
780 		}
781 		list_del_init(&seq->list);
782 		free(seq);
783 		goto end;
784 	default:
785 		BUG_ON("Unknown state of lock sequence found!\n");
786 		break;
787 	}
788 
789 	seq->state = SEQ_STATE_ACQUIRED;
790 	ls->nr_acquired++;
791 	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
792 	seq->prev_event_time = sample->time;
793 end:
794 	return 0;
795 }
796 
report_lock_contended_event(struct evsel * evsel,struct perf_sample * sample)797 static int report_lock_contended_event(struct evsel *evsel,
798 				       struct perf_sample *sample)
799 {
800 	struct lock_stat *ls;
801 	struct thread_stat *ts;
802 	struct lock_seq_stat *seq;
803 	const char *name = evsel__strval(evsel, sample, "name");
804 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
805 	u64 key;
806 	int ret;
807 
808 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
809 	if (ret < 0)
810 		return ret;
811 
812 	ls = lock_stat_findnew(key, name, 0);
813 	if (!ls)
814 		return -ENOMEM;
815 
816 	ts = thread_stat_findnew(sample->tid);
817 	if (!ts)
818 		return -ENOMEM;
819 
820 	seq = get_seq(ts, addr);
821 	if (!seq)
822 		return -ENOMEM;
823 
824 	switch (seq->state) {
825 	case SEQ_STATE_UNINITIALIZED:
826 		/* orphan event, do nothing */
827 		return 0;
828 	case SEQ_STATE_ACQUIRING:
829 		break;
830 	case SEQ_STATE_RELEASED:
831 	case SEQ_STATE_ACQUIRED:
832 	case SEQ_STATE_READ_ACQUIRED:
833 	case SEQ_STATE_CONTENDED:
834 		/* broken lock sequence */
835 		if (!ls->broken) {
836 			ls->broken = 1;
837 			bad_hist[BROKEN_CONTENDED]++;
838 		}
839 		list_del_init(&seq->list);
840 		free(seq);
841 		goto end;
842 	default:
843 		BUG_ON("Unknown state of lock sequence found!\n");
844 		break;
845 	}
846 
847 	seq->state = SEQ_STATE_CONTENDED;
848 	ls->nr_contended++;
849 	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
850 	seq->prev_event_time = sample->time;
851 end:
852 	return 0;
853 }
854 
report_lock_release_event(struct evsel * evsel,struct perf_sample * sample)855 static int report_lock_release_event(struct evsel *evsel,
856 				     struct perf_sample *sample)
857 {
858 	struct lock_stat *ls;
859 	struct thread_stat *ts;
860 	struct lock_seq_stat *seq;
861 	const char *name = evsel__strval(evsel, sample, "name");
862 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
863 	u64 key;
864 	int ret;
865 
866 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
867 	if (ret < 0)
868 		return ret;
869 
870 	ls = lock_stat_findnew(key, name, 0);
871 	if (!ls)
872 		return -ENOMEM;
873 
874 	ts = thread_stat_findnew(sample->tid);
875 	if (!ts)
876 		return -ENOMEM;
877 
878 	seq = get_seq(ts, addr);
879 	if (!seq)
880 		return -ENOMEM;
881 
882 	switch (seq->state) {
883 	case SEQ_STATE_UNINITIALIZED:
884 		goto end;
885 	case SEQ_STATE_ACQUIRED:
886 		break;
887 	case SEQ_STATE_READ_ACQUIRED:
888 		seq->read_count--;
889 		BUG_ON(seq->read_count < 0);
890 		if (seq->read_count) {
891 			ls->nr_release++;
892 			goto end;
893 		}
894 		break;
895 	case SEQ_STATE_ACQUIRING:
896 	case SEQ_STATE_CONTENDED:
897 	case SEQ_STATE_RELEASED:
898 		/* broken lock sequence */
899 		if (!ls->broken) {
900 			ls->broken = 1;
901 			bad_hist[BROKEN_RELEASE]++;
902 		}
903 		goto free_seq;
904 	default:
905 		BUG_ON("Unknown state of lock sequence found!\n");
906 		break;
907 	}
908 
909 	ls->nr_release++;
910 free_seq:
911 	list_del_init(&seq->list);
912 	free(seq);
913 end:
914 	return 0;
915 }
916 
get_symbol_name_offset(struct map * map,struct symbol * sym,u64 ip,char * buf,int size)917 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
918 				  char *buf, int size)
919 {
920 	u64 offset;
921 
922 	if (map == NULL || sym == NULL) {
923 		buf[0] = '\0';
924 		return 0;
925 	}
926 
927 	offset = map__map_ip(map, ip) - sym->start;
928 
929 	if (offset)
930 		return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
931 	else
932 		return strlcpy(buf, sym->name, size);
933 }
lock_contention_caller(struct evsel * evsel,struct perf_sample * sample,char * buf,int size)934 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
935 				  char *buf, int size)
936 {
937 	struct thread *thread;
938 	struct callchain_cursor *cursor;
939 	struct machine *machine = &session->machines.host;
940 	struct symbol *sym;
941 	int skip = 0;
942 	int ret;
943 
944 	/* lock names will be replaced to task name later */
945 	if (show_thread_stats)
946 		return -1;
947 
948 	thread = machine__findnew_thread(machine, -1, sample->pid);
949 	if (thread == NULL)
950 		return -1;
951 
952 	cursor = get_tls_callchain_cursor();
953 
954 	/* use caller function name from the callchain */
955 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
956 					NULL, NULL, max_stack_depth);
957 	if (ret != 0) {
958 		thread__put(thread);
959 		return -1;
960 	}
961 
962 	callchain_cursor_commit(cursor);
963 	thread__put(thread);
964 
965 	while (true) {
966 		struct callchain_cursor_node *node;
967 
968 		node = callchain_cursor_current(cursor);
969 		if (node == NULL)
970 			break;
971 
972 		/* skip first few entries - for lock functions */
973 		if (++skip <= stack_skip)
974 			goto next;
975 
976 		sym = node->ms.sym;
977 		if (sym && !machine__is_lock_function(machine, node->ip)) {
978 			get_symbol_name_offset(node->ms.map, sym, node->ip,
979 					       buf, size);
980 			return 0;
981 		}
982 
983 next:
984 		callchain_cursor_advance(cursor);
985 	}
986 	return -1;
987 }
988 
callchain_id(struct evsel * evsel,struct perf_sample * sample)989 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
990 {
991 	struct callchain_cursor *cursor;
992 	struct machine *machine = &session->machines.host;
993 	struct thread *thread;
994 	u64 hash = 0;
995 	int skip = 0;
996 	int ret;
997 
998 	thread = machine__findnew_thread(machine, -1, sample->pid);
999 	if (thread == NULL)
1000 		return -1;
1001 
1002 	cursor = get_tls_callchain_cursor();
1003 	/* use caller function name from the callchain */
1004 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
1005 					NULL, NULL, max_stack_depth);
1006 	thread__put(thread);
1007 
1008 	if (ret != 0)
1009 		return -1;
1010 
1011 	callchain_cursor_commit(cursor);
1012 
1013 	while (true) {
1014 		struct callchain_cursor_node *node;
1015 
1016 		node = callchain_cursor_current(cursor);
1017 		if (node == NULL)
1018 			break;
1019 
1020 		/* skip first few entries - for lock functions */
1021 		if (++skip <= stack_skip)
1022 			goto next;
1023 
1024 		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
1025 			goto next;
1026 
1027 		hash ^= hash_long((unsigned long)node->ip, 64);
1028 
1029 next:
1030 		callchain_cursor_advance(cursor);
1031 	}
1032 	return hash;
1033 }
1034 
get_callstack(struct perf_sample * sample,int max_stack)1035 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
1036 {
1037 	u64 *callstack;
1038 	u64 i;
1039 	int c;
1040 
1041 	callstack = calloc(max_stack, sizeof(*callstack));
1042 	if (callstack == NULL)
1043 		return NULL;
1044 
1045 	for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
1046 		u64 ip = sample->callchain->ips[i];
1047 
1048 		if (ip >= PERF_CONTEXT_MAX)
1049 			continue;
1050 
1051 		callstack[c++] = ip;
1052 	}
1053 	return callstack;
1054 }
1055 
report_lock_contention_begin_event(struct evsel * evsel,struct perf_sample * sample)1056 static int report_lock_contention_begin_event(struct evsel *evsel,
1057 					      struct perf_sample *sample)
1058 {
1059 	struct lock_stat *ls;
1060 	struct thread_stat *ts;
1061 	struct lock_seq_stat *seq;
1062 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1063 	unsigned int flags = evsel__intval(evsel, sample, "flags");
1064 	u64 key;
1065 	int i, ret;
1066 	static bool kmap_loaded;
1067 	struct machine *machine = &session->machines.host;
1068 	struct map *kmap;
1069 	struct symbol *sym;
1070 
1071 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1072 	if (ret < 0)
1073 		return ret;
1074 
1075 	if (!kmap_loaded) {
1076 		unsigned long *addrs;
1077 
1078 		/* make sure it loads the kernel map to find lock symbols */
1079 		map__load(machine__kernel_map(machine));
1080 		kmap_loaded = true;
1081 
1082 		/* convert (kernel) symbols to addresses */
1083 		for (i = 0; i < filters.nr_syms; i++) {
1084 			sym = machine__find_kernel_symbol_by_name(machine,
1085 								  filters.syms[i],
1086 								  &kmap);
1087 			if (sym == NULL) {
1088 				pr_warning("ignore unknown symbol: %s\n",
1089 					   filters.syms[i]);
1090 				continue;
1091 			}
1092 
1093 			addrs = realloc(filters.addrs,
1094 					(filters.nr_addrs + 1) * sizeof(*addrs));
1095 			if (addrs == NULL) {
1096 				pr_warning("memory allocation failure\n");
1097 				return -ENOMEM;
1098 			}
1099 
1100 			addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1101 			filters.addrs = addrs;
1102 		}
1103 	}
1104 
1105 	ls = lock_stat_find(key);
1106 	if (!ls) {
1107 		char buf[128];
1108 		const char *name = "";
1109 
1110 		switch (aggr_mode) {
1111 		case LOCK_AGGR_ADDR:
1112 			sym = machine__find_kernel_symbol(machine, key, &kmap);
1113 			if (sym)
1114 				name = sym->name;
1115 			break;
1116 		case LOCK_AGGR_CALLER:
1117 			name = buf;
1118 			if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1119 				name = "Unknown";
1120 			break;
1121 		case LOCK_AGGR_TASK:
1122 		default:
1123 			break;
1124 		}
1125 
1126 		ls = lock_stat_findnew(key, name, flags);
1127 		if (!ls)
1128 			return -ENOMEM;
1129 	}
1130 
1131 	if (filters.nr_types) {
1132 		bool found = false;
1133 
1134 		for (i = 0; i < filters.nr_types; i++) {
1135 			if (flags == filters.types[i]) {
1136 				found = true;
1137 				break;
1138 			}
1139 		}
1140 
1141 		if (!found)
1142 			return 0;
1143 	}
1144 
1145 	if (filters.nr_addrs) {
1146 		bool found = false;
1147 
1148 		for (i = 0; i < filters.nr_addrs; i++) {
1149 			if (addr == filters.addrs[i]) {
1150 				found = true;
1151 				break;
1152 			}
1153 		}
1154 
1155 		if (!found)
1156 			return 0;
1157 	}
1158 
1159 	if (needs_callstack()) {
1160 		u64 *callstack = get_callstack(sample, max_stack_depth);
1161 		if (callstack == NULL)
1162 			return -ENOMEM;
1163 
1164 		if (!match_callstack_filter(machine, callstack)) {
1165 			free(callstack);
1166 			return 0;
1167 		}
1168 
1169 		if (ls->callstack == NULL)
1170 			ls->callstack = callstack;
1171 		else
1172 			free(callstack);
1173 	}
1174 
1175 	ts = thread_stat_findnew(sample->tid);
1176 	if (!ts)
1177 		return -ENOMEM;
1178 
1179 	seq = get_seq(ts, addr);
1180 	if (!seq)
1181 		return -ENOMEM;
1182 
1183 	switch (seq->state) {
1184 	case SEQ_STATE_UNINITIALIZED:
1185 	case SEQ_STATE_ACQUIRED:
1186 		break;
1187 	case SEQ_STATE_CONTENDED:
1188 		/*
1189 		 * It can have nested contention begin with mutex spinning,
1190 		 * then we would use the original contention begin event and
1191 		 * ignore the second one.
1192 		 */
1193 		goto end;
1194 	case SEQ_STATE_ACQUIRING:
1195 	case SEQ_STATE_READ_ACQUIRED:
1196 	case SEQ_STATE_RELEASED:
1197 		/* broken lock sequence */
1198 		if (!ls->broken) {
1199 			ls->broken = 1;
1200 			bad_hist[BROKEN_CONTENDED]++;
1201 		}
1202 		list_del_init(&seq->list);
1203 		free(seq);
1204 		goto end;
1205 	default:
1206 		BUG_ON("Unknown state of lock sequence found!\n");
1207 		break;
1208 	}
1209 
1210 	if (seq->state != SEQ_STATE_CONTENDED) {
1211 		seq->state = SEQ_STATE_CONTENDED;
1212 		seq->prev_event_time = sample->time;
1213 		ls->nr_contended++;
1214 	}
1215 end:
1216 	return 0;
1217 }
1218 
report_lock_contention_end_event(struct evsel * evsel,struct perf_sample * sample)1219 static int report_lock_contention_end_event(struct evsel *evsel,
1220 					    struct perf_sample *sample)
1221 {
1222 	struct lock_stat *ls;
1223 	struct thread_stat *ts;
1224 	struct lock_seq_stat *seq;
1225 	u64 contended_term;
1226 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1227 	u64 key;
1228 	int ret;
1229 
1230 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1231 	if (ret < 0)
1232 		return ret;
1233 
1234 	ls = lock_stat_find(key);
1235 	if (!ls)
1236 		return 0;
1237 
1238 	ts = thread_stat_find(sample->tid);
1239 	if (!ts)
1240 		return 0;
1241 
1242 	seq = get_seq(ts, addr);
1243 	if (!seq)
1244 		return -ENOMEM;
1245 
1246 	switch (seq->state) {
1247 	case SEQ_STATE_UNINITIALIZED:
1248 		goto end;
1249 	case SEQ_STATE_CONTENDED:
1250 		contended_term = sample->time - seq->prev_event_time;
1251 		ls->wait_time_total += contended_term;
1252 		if (contended_term < ls->wait_time_min)
1253 			ls->wait_time_min = contended_term;
1254 		if (ls->wait_time_max < contended_term)
1255 			ls->wait_time_max = contended_term;
1256 		break;
1257 	case SEQ_STATE_ACQUIRING:
1258 	case SEQ_STATE_ACQUIRED:
1259 	case SEQ_STATE_READ_ACQUIRED:
1260 	case SEQ_STATE_RELEASED:
1261 		/* broken lock sequence */
1262 		if (!ls->broken) {
1263 			ls->broken = 1;
1264 			bad_hist[BROKEN_ACQUIRED]++;
1265 		}
1266 		list_del_init(&seq->list);
1267 		free(seq);
1268 		goto end;
1269 	default:
1270 		BUG_ON("Unknown state of lock sequence found!\n");
1271 		break;
1272 	}
1273 
1274 	seq->state = SEQ_STATE_ACQUIRED;
1275 	ls->nr_acquired++;
1276 	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1277 end:
1278 	return 0;
1279 }
1280 
1281 /* lock oriented handlers */
1282 /* TODO: handlers for CPU oriented, thread oriented */
1283 static struct trace_lock_handler report_lock_ops  = {
1284 	.acquire_event		= report_lock_acquire_event,
1285 	.acquired_event		= report_lock_acquired_event,
1286 	.contended_event	= report_lock_contended_event,
1287 	.release_event		= report_lock_release_event,
1288 	.contention_begin_event	= report_lock_contention_begin_event,
1289 	.contention_end_event	= report_lock_contention_end_event,
1290 };
1291 
1292 static struct trace_lock_handler contention_lock_ops  = {
1293 	.contention_begin_event	= report_lock_contention_begin_event,
1294 	.contention_end_event	= report_lock_contention_end_event,
1295 };
1296 
1297 
1298 static struct trace_lock_handler *trace_handler;
1299 
evsel__process_lock_acquire(struct evsel * evsel,struct perf_sample * sample)1300 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1301 {
1302 	if (trace_handler->acquire_event)
1303 		return trace_handler->acquire_event(evsel, sample);
1304 	return 0;
1305 }
1306 
evsel__process_lock_acquired(struct evsel * evsel,struct perf_sample * sample)1307 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1308 {
1309 	if (trace_handler->acquired_event)
1310 		return trace_handler->acquired_event(evsel, sample);
1311 	return 0;
1312 }
1313 
evsel__process_lock_contended(struct evsel * evsel,struct perf_sample * sample)1314 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1315 {
1316 	if (trace_handler->contended_event)
1317 		return trace_handler->contended_event(evsel, sample);
1318 	return 0;
1319 }
1320 
evsel__process_lock_release(struct evsel * evsel,struct perf_sample * sample)1321 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1322 {
1323 	if (trace_handler->release_event)
1324 		return trace_handler->release_event(evsel, sample);
1325 	return 0;
1326 }
1327 
evsel__process_contention_begin(struct evsel * evsel,struct perf_sample * sample)1328 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1329 {
1330 	if (trace_handler->contention_begin_event)
1331 		return trace_handler->contention_begin_event(evsel, sample);
1332 	return 0;
1333 }
1334 
evsel__process_contention_end(struct evsel * evsel,struct perf_sample * sample)1335 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1336 {
1337 	if (trace_handler->contention_end_event)
1338 		return trace_handler->contention_end_event(evsel, sample);
1339 	return 0;
1340 }
1341 
print_bad_events(int bad,int total)1342 static void print_bad_events(int bad, int total)
1343 {
1344 	/* Output for debug, this have to be removed */
1345 	int i;
1346 	int broken = 0;
1347 	const char *name[4] =
1348 		{ "acquire", "acquired", "contended", "release" };
1349 
1350 	for (i = 0; i < BROKEN_MAX; i++)
1351 		broken += bad_hist[i];
1352 
1353 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1354 		return;
1355 
1356 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1357 	fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1358 	fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1359 	fprintf(lock_output, "histogram of events caused bad sequence\n");
1360 	for (i = 0; i < BROKEN_MAX; i++)
1361 		fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1362 }
1363 
1364 /* TODO: various way to print, coloring, nano or milli sec */
print_result(void)1365 static void print_result(void)
1366 {
1367 	struct lock_stat *st;
1368 	struct lock_key *key;
1369 	char cut_name[20];
1370 	int bad, total, printed;
1371 
1372 	if (!quiet) {
1373 		fprintf(lock_output, "%20s ", "Name");
1374 		list_for_each_entry(key, &lock_keys, list)
1375 			fprintf(lock_output, "%*s ", key->len, key->header);
1376 		fprintf(lock_output, "\n\n");
1377 	}
1378 
1379 	bad = total = printed = 0;
1380 	while ((st = pop_from_result())) {
1381 		total++;
1382 		if (st->broken)
1383 			bad++;
1384 		if (!st->nr_acquired)
1385 			continue;
1386 
1387 		bzero(cut_name, 20);
1388 
1389 		if (strlen(st->name) < 20) {
1390 			/* output raw name */
1391 			const char *name = st->name;
1392 
1393 			if (show_thread_stats) {
1394 				struct thread *t;
1395 
1396 				/* st->addr contains tid of thread */
1397 				t = perf_session__findnew(session, st->addr);
1398 				name = thread__comm_str(t);
1399 			}
1400 
1401 			fprintf(lock_output, "%20s ", name);
1402 		} else {
1403 			strncpy(cut_name, st->name, 16);
1404 			cut_name[16] = '.';
1405 			cut_name[17] = '.';
1406 			cut_name[18] = '.';
1407 			cut_name[19] = '\0';
1408 			/* cut off name for saving output style */
1409 			fprintf(lock_output, "%20s ", cut_name);
1410 		}
1411 
1412 		list_for_each_entry(key, &lock_keys, list) {
1413 			key->print(key, st);
1414 			fprintf(lock_output, " ");
1415 		}
1416 		fprintf(lock_output, "\n");
1417 
1418 		if (++printed >= print_nr_entries)
1419 			break;
1420 	}
1421 
1422 	print_bad_events(bad, total);
1423 }
1424 
1425 static bool info_threads, info_map;
1426 
dump_threads(void)1427 static void dump_threads(void)
1428 {
1429 	struct thread_stat *st;
1430 	struct rb_node *node;
1431 	struct thread *t;
1432 
1433 	fprintf(lock_output, "%10s: comm\n", "Thread ID");
1434 
1435 	node = rb_first(&thread_stats);
1436 	while (node) {
1437 		st = container_of(node, struct thread_stat, rb);
1438 		t = perf_session__findnew(session, st->tid);
1439 		fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1440 		node = rb_next(node);
1441 		thread__put(t);
1442 	}
1443 }
1444 
compare_maps(struct lock_stat * a,struct lock_stat * b)1445 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1446 {
1447 	int ret;
1448 
1449 	if (a->name && b->name)
1450 		ret = strcmp(a->name, b->name);
1451 	else
1452 		ret = !!a->name - !!b->name;
1453 
1454 	if (!ret)
1455 		return a->addr < b->addr;
1456 	else
1457 		return ret < 0;
1458 }
1459 
dump_map(void)1460 static void dump_map(void)
1461 {
1462 	unsigned int i;
1463 	struct lock_stat *st;
1464 
1465 	fprintf(lock_output, "Address of instance: name of class\n");
1466 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1467 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1468 			insert_to_result(st, compare_maps);
1469 		}
1470 	}
1471 
1472 	while ((st = pop_from_result()))
1473 		fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1474 }
1475 
dump_info(void)1476 static int dump_info(void)
1477 {
1478 	int rc = 0;
1479 
1480 	if (info_threads)
1481 		dump_threads();
1482 	else if (info_map)
1483 		dump_map();
1484 	else {
1485 		rc = -1;
1486 		pr_err("Unknown type of information\n");
1487 	}
1488 
1489 	return rc;
1490 }
1491 
1492 static const struct evsel_str_handler lock_tracepoints[] = {
1493 	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1494 	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1495 	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1496 	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1497 };
1498 
1499 static const struct evsel_str_handler contention_tracepoints[] = {
1500 	{ "lock:contention_begin", evsel__process_contention_begin, },
1501 	{ "lock:contention_end",   evsel__process_contention_end,   },
1502 };
1503 
process_event_update(struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist)1504 static int process_event_update(struct perf_tool *tool,
1505 				union perf_event *event,
1506 				struct evlist **pevlist)
1507 {
1508 	int ret;
1509 
1510 	ret = perf_event__process_event_update(tool, event, pevlist);
1511 	if (ret < 0)
1512 		return ret;
1513 
1514 	/* this can return -EEXIST since we call it for each evsel */
1515 	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1516 	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1517 	return 0;
1518 }
1519 
1520 typedef int (*tracepoint_handler)(struct evsel *evsel,
1521 				  struct perf_sample *sample);
1522 
process_sample_event(struct perf_tool * tool __maybe_unused,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1523 static int process_sample_event(struct perf_tool *tool __maybe_unused,
1524 				union perf_event *event,
1525 				struct perf_sample *sample,
1526 				struct evsel *evsel,
1527 				struct machine *machine)
1528 {
1529 	int err = 0;
1530 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
1531 							sample->tid);
1532 
1533 	if (thread == NULL) {
1534 		pr_debug("problem processing %d event, skipping it.\n",
1535 			event->header.type);
1536 		return -1;
1537 	}
1538 
1539 	if (evsel->handler != NULL) {
1540 		tracepoint_handler f = evsel->handler;
1541 		err = f(evsel, sample);
1542 	}
1543 
1544 	thread__put(thread);
1545 
1546 	return err;
1547 }
1548 
combine_result(void)1549 static void combine_result(void)
1550 {
1551 	unsigned int i;
1552 	struct lock_stat *st;
1553 
1554 	if (!combine_locks)
1555 		return;
1556 
1557 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1558 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1559 			combine_lock_stats(st);
1560 		}
1561 	}
1562 }
1563 
sort_result(void)1564 static void sort_result(void)
1565 {
1566 	unsigned int i;
1567 	struct lock_stat *st;
1568 
1569 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1570 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1571 			insert_to_result(st, compare);
1572 		}
1573 	}
1574 }
1575 
1576 static const struct {
1577 	unsigned int flags;
1578 	const char *str;
1579 	const char *name;
1580 } lock_type_table[] = {
1581 	{ 0,				"semaphore",	"semaphore" },
1582 	{ LCB_F_SPIN,			"spinlock",	"spinlock" },
1583 	{ LCB_F_SPIN | LCB_F_READ,	"rwlock:R",	"rwlock" },
1584 	{ LCB_F_SPIN | LCB_F_WRITE,	"rwlock:W",	"rwlock" },
1585 	{ LCB_F_READ,			"rwsem:R",	"rwsem" },
1586 	{ LCB_F_WRITE,			"rwsem:W",	"rwsem" },
1587 	{ LCB_F_RT,			"rt-mutex",	"rt-mutex" },
1588 	{ LCB_F_RT | LCB_F_READ,	"rwlock-rt:R",	"rwlock-rt" },
1589 	{ LCB_F_RT | LCB_F_WRITE,	"rwlock-rt:W",	"rwlock-rt" },
1590 	{ LCB_F_PERCPU | LCB_F_READ,	"pcpu-sem:R",	"percpu-rwsem" },
1591 	{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W",	"percpu-rwsem" },
1592 	{ LCB_F_MUTEX,			"mutex",	"mutex" },
1593 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex",	"mutex" },
1594 	/* alias for get_type_flag() */
1595 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex-spin",	"mutex" },
1596 };
1597 
get_type_str(unsigned int flags)1598 static const char *get_type_str(unsigned int flags)
1599 {
1600 	flags &= LCB_F_MAX_FLAGS - 1;
1601 
1602 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1603 		if (lock_type_table[i].flags == flags)
1604 			return lock_type_table[i].str;
1605 	}
1606 	return "unknown";
1607 }
1608 
get_type_name(unsigned int flags)1609 static const char *get_type_name(unsigned int flags)
1610 {
1611 	flags &= LCB_F_MAX_FLAGS - 1;
1612 
1613 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1614 		if (lock_type_table[i].flags == flags)
1615 			return lock_type_table[i].name;
1616 	}
1617 	return "unknown";
1618 }
1619 
get_type_flag(const char * str)1620 static unsigned int get_type_flag(const char *str)
1621 {
1622 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1623 		if (!strcmp(lock_type_table[i].name, str))
1624 			return lock_type_table[i].flags;
1625 	}
1626 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1627 		if (!strcmp(lock_type_table[i].str, str))
1628 			return lock_type_table[i].flags;
1629 	}
1630 	return UINT_MAX;
1631 }
1632 
lock_filter_finish(void)1633 static void lock_filter_finish(void)
1634 {
1635 	zfree(&filters.types);
1636 	filters.nr_types = 0;
1637 
1638 	zfree(&filters.addrs);
1639 	filters.nr_addrs = 0;
1640 
1641 	for (int i = 0; i < filters.nr_syms; i++)
1642 		free(filters.syms[i]);
1643 
1644 	zfree(&filters.syms);
1645 	filters.nr_syms = 0;
1646 }
1647 
sort_contention_result(void)1648 static void sort_contention_result(void)
1649 {
1650 	sort_result();
1651 }
1652 
print_header_stdio(void)1653 static void print_header_stdio(void)
1654 {
1655 	struct lock_key *key;
1656 
1657 	list_for_each_entry(key, &lock_keys, list)
1658 		fprintf(lock_output, "%*s ", key->len, key->header);
1659 
1660 	switch (aggr_mode) {
1661 	case LOCK_AGGR_TASK:
1662 		fprintf(lock_output, "  %10s   %s\n\n", "pid",
1663 			show_lock_owner ? "owner" : "comm");
1664 		break;
1665 	case LOCK_AGGR_CALLER:
1666 		fprintf(lock_output, "  %10s   %s\n\n", "type", "caller");
1667 		break;
1668 	case LOCK_AGGR_ADDR:
1669 		fprintf(lock_output, "  %16s   %s\n\n", "address", "symbol");
1670 		break;
1671 	default:
1672 		break;
1673 	}
1674 }
1675 
print_header_csv(const char * sep)1676 static void print_header_csv(const char *sep)
1677 {
1678 	struct lock_key *key;
1679 
1680 	fprintf(lock_output, "# output: ");
1681 	list_for_each_entry(key, &lock_keys, list)
1682 		fprintf(lock_output, "%s%s ", key->header, sep);
1683 
1684 	switch (aggr_mode) {
1685 	case LOCK_AGGR_TASK:
1686 		fprintf(lock_output, "%s%s %s\n", "pid", sep,
1687 			show_lock_owner ? "owner" : "comm");
1688 		break;
1689 	case LOCK_AGGR_CALLER:
1690 		fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1691 		if (verbose > 0)
1692 			fprintf(lock_output, "%s %s", sep, "stacktrace");
1693 		fprintf(lock_output, "\n");
1694 		break;
1695 	case LOCK_AGGR_ADDR:
1696 		fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1697 		break;
1698 	default:
1699 		break;
1700 	}
1701 }
1702 
print_header(void)1703 static void print_header(void)
1704 {
1705 	if (!quiet) {
1706 		if (symbol_conf.field_sep)
1707 			print_header_csv(symbol_conf.field_sep);
1708 		else
1709 			print_header_stdio();
1710 	}
1711 }
1712 
print_lock_stat_stdio(struct lock_contention * con,struct lock_stat * st)1713 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1714 {
1715 	struct lock_key *key;
1716 	struct thread *t;
1717 	int pid;
1718 
1719 	list_for_each_entry(key, &lock_keys, list) {
1720 		key->print(key, st);
1721 		fprintf(lock_output, " ");
1722 	}
1723 
1724 	switch (aggr_mode) {
1725 	case LOCK_AGGR_CALLER:
1726 		fprintf(lock_output, "  %10s   %s\n", get_type_str(st->flags), st->name);
1727 		break;
1728 	case LOCK_AGGR_TASK:
1729 		pid = st->addr;
1730 		t = perf_session__findnew(session, pid);
1731 		fprintf(lock_output, "  %10d   %s\n",
1732 			pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1733 		break;
1734 	case LOCK_AGGR_ADDR:
1735 		fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
1736 			st->name, get_type_name(st->flags));
1737 		break;
1738 	default:
1739 		break;
1740 	}
1741 
1742 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1743 		struct map *kmap;
1744 		struct symbol *sym;
1745 		char buf[128];
1746 		u64 ip;
1747 
1748 		for (int i = 0; i < max_stack_depth; i++) {
1749 			if (!st->callstack || !st->callstack[i])
1750 				break;
1751 
1752 			ip = st->callstack[i];
1753 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1754 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1755 			fprintf(lock_output, "\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1756 		}
1757 	}
1758 }
1759 
print_lock_stat_csv(struct lock_contention * con,struct lock_stat * st,const char * sep)1760 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1761 				const char *sep)
1762 {
1763 	struct lock_key *key;
1764 	struct thread *t;
1765 	int pid;
1766 
1767 	list_for_each_entry(key, &lock_keys, list) {
1768 		key->print(key, st);
1769 		fprintf(lock_output, "%s ", sep);
1770 	}
1771 
1772 	switch (aggr_mode) {
1773 	case LOCK_AGGR_CALLER:
1774 		fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name);
1775 		if (verbose <= 0)
1776 			fprintf(lock_output, "\n");
1777 		break;
1778 	case LOCK_AGGR_TASK:
1779 		pid = st->addr;
1780 		t = perf_session__findnew(session, pid);
1781 		fprintf(lock_output, "%d%s %s\n", pid, sep,
1782 			pid == -1 ? "Unknown" : thread__comm_str(t));
1783 		break;
1784 	case LOCK_AGGR_ADDR:
1785 		fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1786 			st->name, sep, get_type_name(st->flags));
1787 		break;
1788 	default:
1789 		break;
1790 	}
1791 
1792 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1793 		struct map *kmap;
1794 		struct symbol *sym;
1795 		char buf[128];
1796 		u64 ip;
1797 
1798 		for (int i = 0; i < max_stack_depth; i++) {
1799 			if (!st->callstack || !st->callstack[i])
1800 				break;
1801 
1802 			ip = st->callstack[i];
1803 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1804 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1805 			fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1806 		}
1807 		fprintf(lock_output, "\n");
1808 	}
1809 }
1810 
print_lock_stat(struct lock_contention * con,struct lock_stat * st)1811 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1812 {
1813 	if (symbol_conf.field_sep)
1814 		print_lock_stat_csv(con, st, symbol_conf.field_sep);
1815 	else
1816 		print_lock_stat_stdio(con, st);
1817 }
1818 
print_footer_stdio(int total,int bad,struct lock_contention_fails * fails)1819 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1820 {
1821 	/* Output for debug, this have to be removed */
1822 	int broken = fails->task + fails->stack + fails->time + fails->data;
1823 
1824 	if (!use_bpf)
1825 		print_bad_events(bad, total);
1826 
1827 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1828 		return;
1829 
1830 	total += broken;
1831 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1832 	fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1833 	fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1834 
1835 	fprintf(lock_output, "histogram of failure reasons\n");
1836 	fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1837 	fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1838 	fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1839 	fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1840 }
1841 
print_footer_csv(int total,int bad,struct lock_contention_fails * fails,const char * sep)1842 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1843 			     const char *sep)
1844 {
1845 	/* Output for debug, this have to be removed */
1846 	if (use_bpf)
1847 		bad = fails->task + fails->stack + fails->time + fails->data;
1848 
1849 	if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1850 		return;
1851 
1852 	total += bad;
1853 	fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1854 
1855 	if (use_bpf) {
1856 		fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1857 		fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1858 		fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1859 		fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1860 	} else {
1861 		int i;
1862 		const char *name[4] = { "acquire", "acquired", "contended", "release" };
1863 
1864 		for (i = 0; i < BROKEN_MAX; i++)
1865 			fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1866 	}
1867 	fprintf(lock_output, "\n");
1868 }
1869 
print_footer(int total,int bad,struct lock_contention_fails * fails)1870 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1871 {
1872 	if (symbol_conf.field_sep)
1873 		print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1874 	else
1875 		print_footer_stdio(total, bad, fails);
1876 }
1877 
print_contention_result(struct lock_contention * con)1878 static void print_contention_result(struct lock_contention *con)
1879 {
1880 	struct lock_stat *st;
1881 	int bad, total, printed;
1882 
1883 	if (!quiet)
1884 		print_header();
1885 
1886 	bad = total = printed = 0;
1887 
1888 	while ((st = pop_from_result())) {
1889 		total += use_bpf ? st->nr_contended : 1;
1890 		if (st->broken)
1891 			bad++;
1892 
1893 		if (!st->wait_time_total)
1894 			continue;
1895 
1896 		print_lock_stat(con, st);
1897 
1898 		if (++printed >= print_nr_entries)
1899 			break;
1900 	}
1901 
1902 	if (print_nr_entries) {
1903 		/* update the total/bad stats */
1904 		while ((st = pop_from_result())) {
1905 			total += use_bpf ? st->nr_contended : 1;
1906 			if (st->broken)
1907 				bad++;
1908 		}
1909 	}
1910 	/* some entries are collected but hidden by the callstack filter */
1911 	total += con->nr_filtered;
1912 
1913 	print_footer(total, bad, &con->fails);
1914 }
1915 
1916 static bool force;
1917 
__cmd_report(bool display_info)1918 static int __cmd_report(bool display_info)
1919 {
1920 	int err = -EINVAL;
1921 	struct perf_tool eops = {
1922 		.attr		 = perf_event__process_attr,
1923 		.event_update	 = process_event_update,
1924 		.sample		 = process_sample_event,
1925 		.comm		 = perf_event__process_comm,
1926 		.mmap		 = perf_event__process_mmap,
1927 		.namespaces	 = perf_event__process_namespaces,
1928 		.tracing_data	 = perf_event__process_tracing_data,
1929 		.ordered_events	 = true,
1930 	};
1931 	struct perf_data data = {
1932 		.path  = input_name,
1933 		.mode  = PERF_DATA_MODE_READ,
1934 		.force = force,
1935 	};
1936 
1937 	session = perf_session__new(&data, &eops);
1938 	if (IS_ERR(session)) {
1939 		pr_err("Initializing perf session failed\n");
1940 		return PTR_ERR(session);
1941 	}
1942 
1943 	symbol_conf.allow_aliases = true;
1944 	symbol__init(&session->header.env);
1945 
1946 	if (!data.is_pipe) {
1947 		if (!perf_session__has_traces(session, "lock record"))
1948 			goto out_delete;
1949 
1950 		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1951 			pr_err("Initializing perf session tracepoint handlers failed\n");
1952 			goto out_delete;
1953 		}
1954 
1955 		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1956 			pr_err("Initializing perf session tracepoint handlers failed\n");
1957 			goto out_delete;
1958 		}
1959 	}
1960 
1961 	if (setup_output_field(false, output_fields))
1962 		goto out_delete;
1963 
1964 	if (select_key(false))
1965 		goto out_delete;
1966 
1967 	if (show_thread_stats)
1968 		aggr_mode = LOCK_AGGR_TASK;
1969 
1970 	err = perf_session__process_events(session);
1971 	if (err)
1972 		goto out_delete;
1973 
1974 	setup_pager();
1975 	if (display_info) /* used for info subcommand */
1976 		err = dump_info();
1977 	else {
1978 		combine_result();
1979 		sort_result();
1980 		print_result();
1981 	}
1982 
1983 out_delete:
1984 	perf_session__delete(session);
1985 	return err;
1986 }
1987 
sighandler(int sig __maybe_unused)1988 static void sighandler(int sig __maybe_unused)
1989 {
1990 }
1991 
check_lock_contention_options(const struct option * options,const char * const * usage)1992 static int check_lock_contention_options(const struct option *options,
1993 					 const char * const *usage)
1994 
1995 {
1996 	if (show_thread_stats && show_lock_addrs) {
1997 		pr_err("Cannot use thread and addr mode together\n");
1998 		parse_options_usage(usage, options, "threads", 0);
1999 		parse_options_usage(NULL, options, "lock-addr", 0);
2000 		return -1;
2001 	}
2002 
2003 	if (show_lock_owner && !use_bpf) {
2004 		pr_err("Lock owners are available only with BPF\n");
2005 		parse_options_usage(usage, options, "lock-owner", 0);
2006 		parse_options_usage(NULL, options, "use-bpf", 0);
2007 		return -1;
2008 	}
2009 
2010 	if (show_lock_owner && show_lock_addrs) {
2011 		pr_err("Cannot use owner and addr mode together\n");
2012 		parse_options_usage(usage, options, "lock-owner", 0);
2013 		parse_options_usage(NULL, options, "lock-addr", 0);
2014 		return -1;
2015 	}
2016 
2017 	if (symbol_conf.field_sep) {
2018 		if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
2019 		    strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
2020 		    strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
2021 			pr_err("Cannot use the separator that is already used\n");
2022 			parse_options_usage(usage, options, "x", 1);
2023 			return -1;
2024 		}
2025 	}
2026 
2027 	if (show_lock_owner)
2028 		show_thread_stats = true;
2029 
2030 	return 0;
2031 }
2032 
__cmd_contention(int argc,const char ** argv)2033 static int __cmd_contention(int argc, const char **argv)
2034 {
2035 	int err = -EINVAL;
2036 	struct perf_tool eops = {
2037 		.attr		 = perf_event__process_attr,
2038 		.event_update	 = process_event_update,
2039 		.sample		 = process_sample_event,
2040 		.comm		 = perf_event__process_comm,
2041 		.mmap		 = perf_event__process_mmap,
2042 		.tracing_data	 = perf_event__process_tracing_data,
2043 		.ordered_events	 = true,
2044 	};
2045 	struct perf_data data = {
2046 		.path  = input_name,
2047 		.mode  = PERF_DATA_MODE_READ,
2048 		.force = force,
2049 	};
2050 	struct lock_contention con = {
2051 		.target = &target,
2052 		.map_nr_entries = bpf_map_entries,
2053 		.max_stack = max_stack_depth,
2054 		.stack_skip = stack_skip,
2055 		.filters = &filters,
2056 		.save_callstack = needs_callstack(),
2057 		.owner = show_lock_owner,
2058 	};
2059 
2060 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2061 	if (!lockhash_table)
2062 		return -ENOMEM;
2063 
2064 	con.result = &lockhash_table[0];
2065 
2066 	session = perf_session__new(use_bpf ? NULL : &data, &eops);
2067 	if (IS_ERR(session)) {
2068 		pr_err("Initializing perf session failed\n");
2069 		err = PTR_ERR(session);
2070 		session = NULL;
2071 		goto out_delete;
2072 	}
2073 
2074 	con.machine = &session->machines.host;
2075 
2076 	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2077 		show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER;
2078 
2079 	if (con.aggr_mode == LOCK_AGGR_CALLER)
2080 		con.save_callstack = true;
2081 
2082 	symbol_conf.allow_aliases = true;
2083 	symbol__init(&session->header.env);
2084 
2085 	if (use_bpf) {
2086 		err = target__validate(&target);
2087 		if (err) {
2088 			char errbuf[512];
2089 
2090 			target__strerror(&target, err, errbuf, 512);
2091 			pr_err("%s\n", errbuf);
2092 			goto out_delete;
2093 		}
2094 
2095 		signal(SIGINT, sighandler);
2096 		signal(SIGCHLD, sighandler);
2097 		signal(SIGTERM, sighandler);
2098 
2099 		con.evlist = evlist__new();
2100 		if (con.evlist == NULL) {
2101 			err = -ENOMEM;
2102 			goto out_delete;
2103 		}
2104 
2105 		err = evlist__create_maps(con.evlist, &target);
2106 		if (err < 0)
2107 			goto out_delete;
2108 
2109 		if (argc) {
2110 			err = evlist__prepare_workload(con.evlist, &target,
2111 						       argv, false, NULL);
2112 			if (err < 0)
2113 				goto out_delete;
2114 		}
2115 
2116 		if (lock_contention_prepare(&con) < 0) {
2117 			pr_err("lock contention BPF setup failed\n");
2118 			goto out_delete;
2119 		}
2120 	} else if (!data.is_pipe) {
2121 		if (!perf_session__has_traces(session, "lock record"))
2122 			goto out_delete;
2123 
2124 		if (!evlist__find_evsel_by_str(session->evlist,
2125 					       "lock:contention_begin")) {
2126 			pr_err("lock contention evsel not found\n");
2127 			goto out_delete;
2128 		}
2129 
2130 		if (perf_session__set_tracepoints_handlers(session,
2131 						contention_tracepoints)) {
2132 			pr_err("Initializing perf session tracepoint handlers failed\n");
2133 			goto out_delete;
2134 		}
2135 	}
2136 
2137 	if (setup_output_field(true, output_fields))
2138 		goto out_delete;
2139 
2140 	if (select_key(true))
2141 		goto out_delete;
2142 
2143 	if (symbol_conf.field_sep) {
2144 		int i;
2145 		struct lock_key *keys = contention_keys;
2146 
2147 		/* do not align output in CSV format */
2148 		for (i = 0; keys[i].name; i++)
2149 			keys[i].len = 0;
2150 	}
2151 
2152 	if (use_bpf) {
2153 		lock_contention_start();
2154 		if (argc)
2155 			evlist__start_workload(con.evlist);
2156 
2157 		/* wait for signal */
2158 		pause();
2159 
2160 		lock_contention_stop();
2161 		lock_contention_read(&con);
2162 	} else {
2163 		err = perf_session__process_events(session);
2164 		if (err)
2165 			goto out_delete;
2166 	}
2167 
2168 	setup_pager();
2169 
2170 	sort_contention_result();
2171 	print_contention_result(&con);
2172 
2173 out_delete:
2174 	lock_filter_finish();
2175 	evlist__delete(con.evlist);
2176 	lock_contention_finish();
2177 	perf_session__delete(session);
2178 	zfree(&lockhash_table);
2179 	return err;
2180 }
2181 
2182 
__cmd_record(int argc,const char ** argv)2183 static int __cmd_record(int argc, const char **argv)
2184 {
2185 	const char *record_args[] = {
2186 		"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2187 	};
2188 	const char *callgraph_args[] = {
2189 		"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2190 	};
2191 	unsigned int rec_argc, i, j, ret;
2192 	unsigned int nr_tracepoints;
2193 	unsigned int nr_callgraph_args = 0;
2194 	const char **rec_argv;
2195 	bool has_lock_stat = true;
2196 
2197 	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2198 		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2199 			pr_debug("tracepoint %s is not enabled. "
2200 				 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2201 				 lock_tracepoints[i].name);
2202 			has_lock_stat = false;
2203 			break;
2204 		}
2205 	}
2206 
2207 	if (has_lock_stat)
2208 		goto setup_args;
2209 
2210 	for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2211 		if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2212 			pr_err("tracepoint %s is not enabled.\n",
2213 			       contention_tracepoints[i].name);
2214 			return 1;
2215 		}
2216 	}
2217 
2218 	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2219 
2220 setup_args:
2221 	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2222 
2223 	if (has_lock_stat)
2224 		nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2225 	else
2226 		nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2227 
2228 	/* factor of 2 is for -e in front of each tracepoint */
2229 	rec_argc += 2 * nr_tracepoints;
2230 
2231 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2232 	if (!rec_argv)
2233 		return -ENOMEM;
2234 
2235 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2236 		rec_argv[i] = strdup(record_args[i]);
2237 
2238 	for (j = 0; j < nr_tracepoints; j++) {
2239 		const char *ev_name;
2240 
2241 		if (has_lock_stat)
2242 			ev_name = strdup(lock_tracepoints[j].name);
2243 		else
2244 			ev_name = strdup(contention_tracepoints[j].name);
2245 
2246 		if (!ev_name)
2247 			return -ENOMEM;
2248 
2249 		rec_argv[i++] = "-e";
2250 		rec_argv[i++] = ev_name;
2251 	}
2252 
2253 	for (j = 0; j < nr_callgraph_args; j++, i++)
2254 		rec_argv[i] = callgraph_args[j];
2255 
2256 	for (j = 1; j < (unsigned int)argc; j++, i++)
2257 		rec_argv[i] = argv[j];
2258 
2259 	BUG_ON(i != rec_argc);
2260 
2261 	ret = cmd_record(i, rec_argv);
2262 	free(rec_argv);
2263 	return ret;
2264 }
2265 
parse_map_entry(const struct option * opt,const char * str,int unset __maybe_unused)2266 static int parse_map_entry(const struct option *opt, const char *str,
2267 			    int unset __maybe_unused)
2268 {
2269 	unsigned long *len = (unsigned long *)opt->value;
2270 	unsigned long val;
2271 	char *endptr;
2272 
2273 	errno = 0;
2274 	val = strtoul(str, &endptr, 0);
2275 	if (*endptr != '\0' || errno != 0) {
2276 		pr_err("invalid BPF map length: %s\n", str);
2277 		return -1;
2278 	}
2279 
2280 	*len = val;
2281 	return 0;
2282 }
2283 
parse_max_stack(const struct option * opt,const char * str,int unset __maybe_unused)2284 static int parse_max_stack(const struct option *opt, const char *str,
2285 			   int unset __maybe_unused)
2286 {
2287 	unsigned long *len = (unsigned long *)opt->value;
2288 	long val;
2289 	char *endptr;
2290 
2291 	errno = 0;
2292 	val = strtol(str, &endptr, 0);
2293 	if (*endptr != '\0' || errno != 0) {
2294 		pr_err("invalid max stack depth: %s\n", str);
2295 		return -1;
2296 	}
2297 
2298 	if (val < 0 || val > sysctl__max_stack()) {
2299 		pr_err("invalid max stack depth: %ld\n", val);
2300 		return -1;
2301 	}
2302 
2303 	*len = val;
2304 	return 0;
2305 }
2306 
add_lock_type(unsigned int flags)2307 static bool add_lock_type(unsigned int flags)
2308 {
2309 	unsigned int *tmp;
2310 
2311 	tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2312 	if (tmp == NULL)
2313 		return false;
2314 
2315 	tmp[filters.nr_types++] = flags;
2316 	filters.types = tmp;
2317 	return true;
2318 }
2319 
parse_lock_type(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2320 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2321 			   int unset __maybe_unused)
2322 {
2323 	char *s, *tmp, *tok;
2324 	int ret = 0;
2325 
2326 	s = strdup(str);
2327 	if (s == NULL)
2328 		return -1;
2329 
2330 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2331 		unsigned int flags = get_type_flag(tok);
2332 
2333 		if (flags == -1U) {
2334 			pr_err("Unknown lock flags: %s\n", tok);
2335 			ret = -1;
2336 			break;
2337 		}
2338 
2339 		if (!add_lock_type(flags)) {
2340 			ret = -1;
2341 			break;
2342 		}
2343 	}
2344 
2345 	free(s);
2346 	return ret;
2347 }
2348 
add_lock_addr(unsigned long addr)2349 static bool add_lock_addr(unsigned long addr)
2350 {
2351 	unsigned long *tmp;
2352 
2353 	tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2354 	if (tmp == NULL) {
2355 		pr_err("Memory allocation failure\n");
2356 		return false;
2357 	}
2358 
2359 	tmp[filters.nr_addrs++] = addr;
2360 	filters.addrs = tmp;
2361 	return true;
2362 }
2363 
add_lock_sym(char * name)2364 static bool add_lock_sym(char *name)
2365 {
2366 	char **tmp;
2367 	char *sym = strdup(name);
2368 
2369 	if (sym == NULL) {
2370 		pr_err("Memory allocation failure\n");
2371 		return false;
2372 	}
2373 
2374 	tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2375 	if (tmp == NULL) {
2376 		pr_err("Memory allocation failure\n");
2377 		free(sym);
2378 		return false;
2379 	}
2380 
2381 	tmp[filters.nr_syms++] = sym;
2382 	filters.syms = tmp;
2383 	return true;
2384 }
2385 
parse_lock_addr(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2386 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2387 			   int unset __maybe_unused)
2388 {
2389 	char *s, *tmp, *tok;
2390 	int ret = 0;
2391 	u64 addr;
2392 
2393 	s = strdup(str);
2394 	if (s == NULL)
2395 		return -1;
2396 
2397 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2398 		char *end;
2399 
2400 		addr = strtoul(tok, &end, 16);
2401 		if (*end == '\0') {
2402 			if (!add_lock_addr(addr)) {
2403 				ret = -1;
2404 				break;
2405 			}
2406 			continue;
2407 		}
2408 
2409 		/*
2410 		 * At this moment, we don't have kernel symbols.  Save the symbols
2411 		 * in a separate list and resolve them to addresses later.
2412 		 */
2413 		if (!add_lock_sym(tok)) {
2414 			ret = -1;
2415 			break;
2416 		}
2417 	}
2418 
2419 	free(s);
2420 	return ret;
2421 }
2422 
parse_call_stack(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2423 static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
2424 			   int unset __maybe_unused)
2425 {
2426 	char *s, *tmp, *tok;
2427 	int ret = 0;
2428 
2429 	s = strdup(str);
2430 	if (s == NULL)
2431 		return -1;
2432 
2433 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2434 		struct callstack_filter *entry;
2435 
2436 		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
2437 		if (entry == NULL) {
2438 			pr_err("Memory allocation failure\n");
2439 			return -1;
2440 		}
2441 
2442 		strcpy(entry->name, tok);
2443 		list_add_tail(&entry->list, &callstack_filters);
2444 	}
2445 
2446 	free(s);
2447 	return ret;
2448 }
2449 
parse_output(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2450 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2451 			int unset __maybe_unused)
2452 {
2453 	const char **name = (const char **)opt->value;
2454 
2455 	if (str == NULL)
2456 		return -1;
2457 
2458 	lock_output = fopen(str, "w");
2459 	if (lock_output == NULL) {
2460 		pr_err("Cannot open %s\n", str);
2461 		return -1;
2462 	}
2463 
2464 	*name = str;
2465 	return 0;
2466 }
2467 
cmd_lock(int argc,const char ** argv)2468 int cmd_lock(int argc, const char **argv)
2469 {
2470 	const struct option lock_options[] = {
2471 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
2472 	OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2473 	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2474 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2475 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2476 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2477 		   "file", "vmlinux pathname"),
2478 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2479 		   "file", "kallsyms pathname"),
2480 	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2481 	OPT_END()
2482 	};
2483 
2484 	const struct option info_options[] = {
2485 	OPT_BOOLEAN('t', "threads", &info_threads,
2486 		    "dump thread list in perf.data"),
2487 	OPT_BOOLEAN('m', "map", &info_map,
2488 		    "map of lock instances (address:name table)"),
2489 	OPT_PARENT(lock_options)
2490 	};
2491 
2492 	const struct option report_options[] = {
2493 	OPT_STRING('k', "key", &sort_key, "acquired",
2494 		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2495 	OPT_STRING('F', "field", &output_fields, NULL,
2496 		    "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2497 	/* TODO: type */
2498 	OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2499 		    "combine locks in the same class"),
2500 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2501 		    "show per-thread lock stats"),
2502 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2503 	OPT_PARENT(lock_options)
2504 	};
2505 
2506 	struct option contention_options[] = {
2507 	OPT_STRING('k', "key", &sort_key, "wait_total",
2508 		    "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2509 	OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2510 		    "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2511 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2512 		    "show per-thread lock stats"),
2513 	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2514 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2515 		    "System-wide collection from all CPUs"),
2516 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2517 		    "List of cpus to monitor"),
2518 	OPT_STRING('p', "pid", &target.pid, "pid",
2519 		   "Trace on existing process id"),
2520 	OPT_STRING(0, "tid", &target.tid, "tid",
2521 		   "Trace on existing thread id (exclusive to --pid)"),
2522 	OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2523 		     "Max number of BPF map entries", parse_map_entry),
2524 	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2525 		     "Set the maximum stack depth when collecting lock contention, "
2526 		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2527 	OPT_INTEGER(0, "stack-skip", &stack_skip,
2528 		    "Set the number of stack depth to skip when finding a lock caller, "
2529 		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
2530 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2531 	OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2532 	OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2533 		     "Filter specific type of locks", parse_lock_type),
2534 	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2535 		     "Filter specific address/symbol of locks", parse_lock_addr),
2536 	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2537 		     "Filter specific function in the callstack", parse_call_stack),
2538 	OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2539 	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2540 		   "print result in CSV format with custom separator"),
2541 	OPT_PARENT(lock_options)
2542 	};
2543 
2544 	const char * const info_usage[] = {
2545 		"perf lock info [<options>]",
2546 		NULL
2547 	};
2548 	const char *const lock_subcommands[] = { "record", "report", "script",
2549 						 "info", "contention", NULL };
2550 	const char *lock_usage[] = {
2551 		NULL,
2552 		NULL
2553 	};
2554 	const char * const report_usage[] = {
2555 		"perf lock report [<options>]",
2556 		NULL
2557 	};
2558 	const char * const contention_usage[] = {
2559 		"perf lock contention [<options>]",
2560 		NULL
2561 	};
2562 	unsigned int i;
2563 	int rc = 0;
2564 
2565 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2566 	if (!lockhash_table)
2567 		return -ENOMEM;
2568 
2569 	for (i = 0; i < LOCKHASH_SIZE; i++)
2570 		INIT_HLIST_HEAD(lockhash_table + i);
2571 
2572 	lock_output = stderr;
2573 	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2574 					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2575 	if (!argc)
2576 		usage_with_options(lock_usage, lock_options);
2577 
2578 	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2579 		return __cmd_record(argc, argv);
2580 	} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2581 		trace_handler = &report_lock_ops;
2582 		if (argc) {
2583 			argc = parse_options(argc, argv,
2584 					     report_options, report_usage, 0);
2585 			if (argc)
2586 				usage_with_options(report_usage, report_options);
2587 		}
2588 		rc = __cmd_report(false);
2589 	} else if (!strcmp(argv[0], "script")) {
2590 		/* Aliased to 'perf script' */
2591 		rc = cmd_script(argc, argv);
2592 	} else if (!strcmp(argv[0], "info")) {
2593 		if (argc) {
2594 			argc = parse_options(argc, argv,
2595 					     info_options, info_usage, 0);
2596 			if (argc)
2597 				usage_with_options(info_usage, info_options);
2598 		}
2599 		/* recycling report_lock_ops */
2600 		trace_handler = &report_lock_ops;
2601 		rc = __cmd_report(true);
2602 	} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2603 		trace_handler = &contention_lock_ops;
2604 		sort_key = "wait_total";
2605 		output_fields = "contended,wait_total,wait_max,avg_wait";
2606 
2607 #ifndef HAVE_BPF_SKEL
2608 		set_option_nobuild(contention_options, 'b', "use-bpf",
2609 				   "no BUILD_BPF_SKEL=1", false);
2610 #endif
2611 		if (argc) {
2612 			argc = parse_options(argc, argv, contention_options,
2613 					     contention_usage, 0);
2614 		}
2615 
2616 		if (check_lock_contention_options(contention_options,
2617 						  contention_usage) < 0)
2618 			return -1;
2619 
2620 		rc = __cmd_contention(argc, argv);
2621 	} else {
2622 		usage_with_options(lock_usage, lock_options);
2623 	}
2624 
2625 	zfree(&lockhash_table);
2626 	return rc;
2627 }
2628