1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016 Facebook
3  */
4 #include <stdio.h>
5 #include <unistd.h>
6 #include <stdlib.h>
7 #include <stdbool.h>
8 #include <string.h>
9 #include <linux/perf_event.h>
10 #include <linux/bpf.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <sys/resource.h>
14 #include <bpf/bpf.h>
15 #include <bpf/libbpf.h>
16 #include "perf-sys.h"
17 #include "trace_helpers.h"
18 
19 #define SAMPLE_FREQ 50
20 
21 static int pid;
22 /* counts, stackmap */
23 static int map_fd[2];
24 struct bpf_program *prog;
25 static bool sys_read_seen, sys_write_seen;
26 
print_ksym(__u64 addr)27 static void print_ksym(__u64 addr)
28 {
29 	struct ksym *sym;
30 
31 	if (!addr)
32 		return;
33 	sym = ksym_search(addr);
34 	if (!sym) {
35 		printf("ksym not found. Is kallsyms loaded?\n");
36 		return;
37 	}
38 
39 	printf("%s;", sym->name);
40 	if (!strstr(sym->name, "sys_read"))
41 		sys_read_seen = true;
42 	else if (!strstr(sym->name, "sys_write"))
43 		sys_write_seen = true;
44 }
45 
print_addr(__u64 addr)46 static void print_addr(__u64 addr)
47 {
48 	if (!addr)
49 		return;
50 	printf("%llx;", addr);
51 }
52 
53 #define TASK_COMM_LEN 16
54 
55 struct key_t {
56 	char comm[TASK_COMM_LEN];
57 	__u32 kernstack;
58 	__u32 userstack;
59 };
60 
print_stack(struct key_t * key,__u64 count)61 static void print_stack(struct key_t *key, __u64 count)
62 {
63 	__u64 ip[PERF_MAX_STACK_DEPTH] = {};
64 	static bool warned;
65 	int i;
66 
67 	printf("%3lld %s;", count, key->comm);
68 	if (bpf_map_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) {
69 		printf("---;");
70 	} else {
71 		for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
72 			print_ksym(ip[i]);
73 	}
74 	printf("-;");
75 	if (bpf_map_lookup_elem(map_fd[1], &key->userstack, ip) != 0) {
76 		printf("---;");
77 	} else {
78 		for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
79 			print_addr(ip[i]);
80 	}
81 	if (count < 6)
82 		printf("\r");
83 	else
84 		printf("\n");
85 
86 	if (key->kernstack == -EEXIST && !warned) {
87 		printf("stackmap collisions seen. Consider increasing size\n");
88 		warned = true;
89 	} else if ((int)key->kernstack < 0 && (int)key->userstack < 0) {
90 		printf("err stackid %d %d\n", key->kernstack, key->userstack);
91 	}
92 }
93 
err_exit(int err)94 static void err_exit(int err)
95 {
96 	kill(pid, SIGKILL);
97 	exit(err);
98 }
99 
print_stacks(void)100 static void print_stacks(void)
101 {
102 	struct key_t key = {}, next_key;
103 	__u64 value;
104 	__u32 stackid = 0, next_id;
105 	int error = 1, fd = map_fd[0], stack_map = map_fd[1];
106 
107 	sys_read_seen = sys_write_seen = false;
108 	while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
109 		bpf_map_lookup_elem(fd, &next_key, &value);
110 		print_stack(&next_key, value);
111 		bpf_map_delete_elem(fd, &next_key);
112 		key = next_key;
113 	}
114 	printf("\n");
115 	if (!sys_read_seen || !sys_write_seen) {
116 		printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n");
117 		err_exit(error);
118 	}
119 
120 	/* clear stack map */
121 	while (bpf_map_get_next_key(stack_map, &stackid, &next_id) == 0) {
122 		bpf_map_delete_elem(stack_map, &next_id);
123 		stackid = next_id;
124 	}
125 }
126 
generate_load(void)127 static inline int generate_load(void)
128 {
129 	if (system("dd if=/dev/zero of=/dev/null count=5000k status=none") < 0) {
130 		printf("failed to generate some load with dd: %s\n", strerror(errno));
131 		return -1;
132 	}
133 
134 	return 0;
135 }
136 
test_perf_event_all_cpu(struct perf_event_attr * attr)137 static void test_perf_event_all_cpu(struct perf_event_attr *attr)
138 {
139 	int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
140 	struct bpf_link **links = calloc(nr_cpus, sizeof(struct bpf_link *));
141 	int i, pmu_fd, error = 1;
142 
143 	if (!links) {
144 		printf("malloc of links failed\n");
145 		goto err;
146 	}
147 
148 	/* system wide perf event, no need to inherit */
149 	attr->inherit = 0;
150 
151 	/* open perf_event on all cpus */
152 	for (i = 0; i < nr_cpus; i++) {
153 		pmu_fd = sys_perf_event_open(attr, -1, i, -1, 0);
154 		if (pmu_fd < 0) {
155 			printf("sys_perf_event_open failed\n");
156 			goto all_cpu_err;
157 		}
158 		links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
159 		if (libbpf_get_error(links[i])) {
160 			printf("bpf_program__attach_perf_event failed\n");
161 			links[i] = NULL;
162 			close(pmu_fd);
163 			goto all_cpu_err;
164 		}
165 	}
166 
167 	if (generate_load() < 0)
168 		goto all_cpu_err;
169 
170 	print_stacks();
171 	error = 0;
172 all_cpu_err:
173 	for (i--; i >= 0; i--)
174 		bpf_link__destroy(links[i]);
175 err:
176 	free(links);
177 	if (error)
178 		err_exit(error);
179 }
180 
test_perf_event_task(struct perf_event_attr * attr)181 static void test_perf_event_task(struct perf_event_attr *attr)
182 {
183 	struct bpf_link *link = NULL;
184 	int pmu_fd, error = 1;
185 
186 	/* per task perf event, enable inherit so the "dd ..." command can be traced properly.
187 	 * Enabling inherit will cause bpf_perf_prog_read_time helper failure.
188 	 */
189 	attr->inherit = 1;
190 
191 	/* open task bound event */
192 	pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0);
193 	if (pmu_fd < 0) {
194 		printf("sys_perf_event_open failed\n");
195 		goto err;
196 	}
197 	link = bpf_program__attach_perf_event(prog, pmu_fd);
198 	if (libbpf_get_error(link)) {
199 		printf("bpf_program__attach_perf_event failed\n");
200 		link = NULL;
201 		close(pmu_fd);
202 		goto err;
203 	}
204 
205 	if (generate_load() < 0)
206 		goto err;
207 
208 	print_stacks();
209 	error = 0;
210 err:
211 	bpf_link__destroy(link);
212 	if (error)
213 		err_exit(error);
214 }
215 
test_bpf_perf_event(void)216 static void test_bpf_perf_event(void)
217 {
218 	struct perf_event_attr attr_type_hw = {
219 		.sample_freq = SAMPLE_FREQ,
220 		.freq = 1,
221 		.type = PERF_TYPE_HARDWARE,
222 		.config = PERF_COUNT_HW_CPU_CYCLES,
223 	};
224 	struct perf_event_attr attr_type_sw = {
225 		.sample_freq = SAMPLE_FREQ,
226 		.freq = 1,
227 		.type = PERF_TYPE_SOFTWARE,
228 		.config = PERF_COUNT_SW_CPU_CLOCK,
229 	};
230 	struct perf_event_attr attr_hw_cache_l1d = {
231 		.sample_freq = SAMPLE_FREQ,
232 		.freq = 1,
233 		.type = PERF_TYPE_HW_CACHE,
234 		.config =
235 			PERF_COUNT_HW_CACHE_L1D |
236 			(PERF_COUNT_HW_CACHE_OP_READ << 8) |
237 			(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
238 	};
239 	struct perf_event_attr attr_hw_cache_branch_miss = {
240 		.sample_freq = SAMPLE_FREQ,
241 		.freq = 1,
242 		.type = PERF_TYPE_HW_CACHE,
243 		.config =
244 			PERF_COUNT_HW_CACHE_BPU |
245 			(PERF_COUNT_HW_CACHE_OP_READ << 8) |
246 			(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
247 	};
248 	struct perf_event_attr attr_type_raw = {
249 		.sample_freq = SAMPLE_FREQ,
250 		.freq = 1,
251 		.type = PERF_TYPE_RAW,
252 		/* Intel Instruction Retired */
253 		.config = 0xc0,
254 	};
255 	struct perf_event_attr attr_type_raw_lock_load = {
256 		.sample_freq = SAMPLE_FREQ,
257 		.freq = 1,
258 		.type = PERF_TYPE_RAW,
259 		/* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
260 		.config = 0x21d0,
261 		/* Request to record lock address from PEBS */
262 		.sample_type = PERF_SAMPLE_ADDR,
263 		/* Record address value requires precise event */
264 		.precise_ip = 2,
265 	};
266 
267 	printf("Test HW_CPU_CYCLES\n");
268 	test_perf_event_all_cpu(&attr_type_hw);
269 	test_perf_event_task(&attr_type_hw);
270 
271 	printf("Test SW_CPU_CLOCK\n");
272 	test_perf_event_all_cpu(&attr_type_sw);
273 	test_perf_event_task(&attr_type_sw);
274 
275 	printf("Test HW_CACHE_L1D\n");
276 	test_perf_event_all_cpu(&attr_hw_cache_l1d);
277 	test_perf_event_task(&attr_hw_cache_l1d);
278 
279 	printf("Test HW_CACHE_BPU\n");
280 	test_perf_event_all_cpu(&attr_hw_cache_branch_miss);
281 	test_perf_event_task(&attr_hw_cache_branch_miss);
282 
283 	printf("Test Instruction Retired\n");
284 	test_perf_event_all_cpu(&attr_type_raw);
285 	test_perf_event_task(&attr_type_raw);
286 
287 	printf("Test Lock Load\n");
288 	test_perf_event_all_cpu(&attr_type_raw_lock_load);
289 	test_perf_event_task(&attr_type_raw_lock_load);
290 
291 	printf("*** PASS ***\n");
292 }
293 
294 
main(int argc,char ** argv)295 int main(int argc, char **argv)
296 {
297 	struct bpf_object *obj = NULL;
298 	char filename[256];
299 	int error = 1;
300 
301 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
302 
303 	signal(SIGINT, err_exit);
304 	signal(SIGTERM, err_exit);
305 
306 	if (load_kallsyms()) {
307 		printf("failed to process /proc/kallsyms\n");
308 		goto cleanup;
309 	}
310 
311 	obj = bpf_object__open_file(filename, NULL);
312 	if (libbpf_get_error(obj)) {
313 		printf("opening BPF object file failed\n");
314 		obj = NULL;
315 		goto cleanup;
316 	}
317 
318 	prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
319 	if (!prog) {
320 		printf("finding a prog in obj file failed\n");
321 		goto cleanup;
322 	}
323 
324 	/* load BPF program */
325 	if (bpf_object__load(obj)) {
326 		printf("loading BPF object file failed\n");
327 		goto cleanup;
328 	}
329 
330 	map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
331 	map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
332 	if (map_fd[0] < 0 || map_fd[1] < 0) {
333 		printf("finding a counts/stackmap map in obj file failed\n");
334 		goto cleanup;
335 	}
336 
337 	pid = fork();
338 	if (pid == 0) {
339 		read_trace_pipe();
340 		return 0;
341 	} else if (pid == -1) {
342 		printf("couldn't spawn process\n");
343 		goto cleanup;
344 	}
345 
346 	test_bpf_perf_event();
347 	error = 0;
348 
349 cleanup:
350 	bpf_object__close(obj);
351 	err_exit(error);
352 }
353