1 /*
2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3  *
4  * Parts came from builtin-{top,stat,record}.c, see those files for further
5  * copyright notes.
6  *
7  * Released under the GPL v2. (and only v2, not any later version)
8  */
9 
10 #include <byteswap.h>
11 #include "asm/bug.h"
12 #include "evsel.h"
13 #include "evlist.h"
14 #include "util.h"
15 #include "cpumap.h"
16 #include "thread_map.h"
17 
18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
20 
__perf_evsel__sample_size(u64 sample_type)21 int __perf_evsel__sample_size(u64 sample_type)
22 {
23 	u64 mask = sample_type & PERF_SAMPLE_MASK;
24 	int size = 0;
25 	int i;
26 
27 	for (i = 0; i < 64; i++) {
28 		if (mask & (1ULL << i))
29 			size++;
30 	}
31 
32 	size *= sizeof(u64);
33 
34 	return size;
35 }
36 
hists__init(struct hists * hists)37 void hists__init(struct hists *hists)
38 {
39 	memset(hists, 0, sizeof(*hists));
40 	hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
41 	hists->entries_in = &hists->entries_in_array[0];
42 	hists->entries_collapsed = RB_ROOT;
43 	hists->entries = RB_ROOT;
44 	pthread_mutex_init(&hists->lock, NULL);
45 }
46 
perf_evsel__init(struct perf_evsel * evsel,struct perf_event_attr * attr,int idx)47 void perf_evsel__init(struct perf_evsel *evsel,
48 		      struct perf_event_attr *attr, int idx)
49 {
50 	evsel->idx	   = idx;
51 	evsel->attr	   = *attr;
52 	INIT_LIST_HEAD(&evsel->node);
53 	hists__init(&evsel->hists);
54 }
55 
perf_evsel__new(struct perf_event_attr * attr,int idx)56 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
57 {
58 	struct perf_evsel *evsel = zalloc(sizeof(*evsel));
59 
60 	if (evsel != NULL)
61 		perf_evsel__init(evsel, attr, idx);
62 
63 	return evsel;
64 }
65 
perf_evsel__config(struct perf_evsel * evsel,struct perf_record_opts * opts,struct perf_evsel * first)66 void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts,
67 			struct perf_evsel *first)
68 {
69 	struct perf_event_attr *attr = &evsel->attr;
70 	int track = !evsel->idx; /* only the first counter needs these */
71 
72 	attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
73 	attr->inherit	    = !opts->no_inherit;
74 	attr->read_format   = PERF_FORMAT_TOTAL_TIME_ENABLED |
75 			      PERF_FORMAT_TOTAL_TIME_RUNNING |
76 			      PERF_FORMAT_ID;
77 
78 	attr->sample_type  |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
79 
80 	/*
81 	 * We default some events to a 1 default interval. But keep
82 	 * it a weak assumption overridable by the user.
83 	 */
84 	if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
85 				     opts->user_interval != ULLONG_MAX)) {
86 		if (opts->freq) {
87 			attr->sample_type	|= PERF_SAMPLE_PERIOD;
88 			attr->freq		= 1;
89 			attr->sample_freq	= opts->freq;
90 		} else {
91 			attr->sample_period = opts->default_interval;
92 		}
93 	}
94 
95 	if (opts->no_samples)
96 		attr->sample_freq = 0;
97 
98 	if (opts->inherit_stat)
99 		attr->inherit_stat = 1;
100 
101 	if (opts->sample_address) {
102 		attr->sample_type	|= PERF_SAMPLE_ADDR;
103 		attr->mmap_data = track;
104 	}
105 
106 	if (opts->call_graph)
107 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
108 
109 	if (opts->system_wide)
110 		attr->sample_type	|= PERF_SAMPLE_CPU;
111 
112 	if (opts->period)
113 		attr->sample_type	|= PERF_SAMPLE_PERIOD;
114 
115 	if (!opts->sample_id_all_missing &&
116 	    (opts->sample_time || opts->system_wide ||
117 	     !opts->no_inherit || opts->cpu_list))
118 		attr->sample_type	|= PERF_SAMPLE_TIME;
119 
120 	if (opts->raw_samples) {
121 		attr->sample_type	|= PERF_SAMPLE_TIME;
122 		attr->sample_type	|= PERF_SAMPLE_RAW;
123 		attr->sample_type	|= PERF_SAMPLE_CPU;
124 	}
125 
126 	if (opts->no_delay) {
127 		attr->watermark = 0;
128 		attr->wakeup_events = 1;
129 	}
130 	if (opts->branch_stack) {
131 		attr->sample_type	|= PERF_SAMPLE_BRANCH_STACK;
132 		attr->branch_sample_type = opts->branch_stack;
133 	}
134 
135 	attr->mmap = track;
136 	attr->comm = track;
137 
138 	if (!opts->target_pid && !opts->target_tid && !opts->system_wide &&
139 	    (!opts->group || evsel == first)) {
140 		attr->disabled = 1;
141 		attr->enable_on_exec = 1;
142 	}
143 }
144 
perf_evsel__alloc_fd(struct perf_evsel * evsel,int ncpus,int nthreads)145 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
146 {
147 	int cpu, thread;
148 	evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
149 
150 	if (evsel->fd) {
151 		for (cpu = 0; cpu < ncpus; cpu++) {
152 			for (thread = 0; thread < nthreads; thread++) {
153 				FD(evsel, cpu, thread) = -1;
154 			}
155 		}
156 	}
157 
158 	return evsel->fd != NULL ? 0 : -ENOMEM;
159 }
160 
perf_evsel__alloc_id(struct perf_evsel * evsel,int ncpus,int nthreads)161 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
162 {
163 	evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
164 	if (evsel->sample_id == NULL)
165 		return -ENOMEM;
166 
167 	evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
168 	if (evsel->id == NULL) {
169 		xyarray__delete(evsel->sample_id);
170 		evsel->sample_id = NULL;
171 		return -ENOMEM;
172 	}
173 
174 	return 0;
175 }
176 
perf_evsel__alloc_counts(struct perf_evsel * evsel,int ncpus)177 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
178 {
179 	evsel->counts = zalloc((sizeof(*evsel->counts) +
180 				(ncpus * sizeof(struct perf_counts_values))));
181 	return evsel->counts != NULL ? 0 : -ENOMEM;
182 }
183 
perf_evsel__free_fd(struct perf_evsel * evsel)184 void perf_evsel__free_fd(struct perf_evsel *evsel)
185 {
186 	xyarray__delete(evsel->fd);
187 	evsel->fd = NULL;
188 }
189 
perf_evsel__free_id(struct perf_evsel * evsel)190 void perf_evsel__free_id(struct perf_evsel *evsel)
191 {
192 	xyarray__delete(evsel->sample_id);
193 	evsel->sample_id = NULL;
194 	free(evsel->id);
195 	evsel->id = NULL;
196 }
197 
perf_evsel__close_fd(struct perf_evsel * evsel,int ncpus,int nthreads)198 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
199 {
200 	int cpu, thread;
201 
202 	for (cpu = 0; cpu < ncpus; cpu++)
203 		for (thread = 0; thread < nthreads; ++thread) {
204 			close(FD(evsel, cpu, thread));
205 			FD(evsel, cpu, thread) = -1;
206 		}
207 }
208 
perf_evsel__exit(struct perf_evsel * evsel)209 void perf_evsel__exit(struct perf_evsel *evsel)
210 {
211 	assert(list_empty(&evsel->node));
212 	xyarray__delete(evsel->fd);
213 	xyarray__delete(evsel->sample_id);
214 	free(evsel->id);
215 }
216 
perf_evsel__delete(struct perf_evsel * evsel)217 void perf_evsel__delete(struct perf_evsel *evsel)
218 {
219 	perf_evsel__exit(evsel);
220 	close_cgroup(evsel->cgrp);
221 	free(evsel->name);
222 	free(evsel);
223 }
224 
__perf_evsel__read_on_cpu(struct perf_evsel * evsel,int cpu,int thread,bool scale)225 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
226 			      int cpu, int thread, bool scale)
227 {
228 	struct perf_counts_values count;
229 	size_t nv = scale ? 3 : 1;
230 
231 	if (FD(evsel, cpu, thread) < 0)
232 		return -EINVAL;
233 
234 	if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
235 		return -ENOMEM;
236 
237 	if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
238 		return -errno;
239 
240 	if (scale) {
241 		if (count.run == 0)
242 			count.val = 0;
243 		else if (count.run < count.ena)
244 			count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
245 	} else
246 		count.ena = count.run = 0;
247 
248 	evsel->counts->cpu[cpu] = count;
249 	return 0;
250 }
251 
__perf_evsel__read(struct perf_evsel * evsel,int ncpus,int nthreads,bool scale)252 int __perf_evsel__read(struct perf_evsel *evsel,
253 		       int ncpus, int nthreads, bool scale)
254 {
255 	size_t nv = scale ? 3 : 1;
256 	int cpu, thread;
257 	struct perf_counts_values *aggr = &evsel->counts->aggr, count;
258 
259 	aggr->val = aggr->ena = aggr->run = 0;
260 
261 	for (cpu = 0; cpu < ncpus; cpu++) {
262 		for (thread = 0; thread < nthreads; thread++) {
263 			if (FD(evsel, cpu, thread) < 0)
264 				continue;
265 
266 			if (readn(FD(evsel, cpu, thread),
267 				  &count, nv * sizeof(u64)) < 0)
268 				return -errno;
269 
270 			aggr->val += count.val;
271 			if (scale) {
272 				aggr->ena += count.ena;
273 				aggr->run += count.run;
274 			}
275 		}
276 	}
277 
278 	evsel->counts->scaled = 0;
279 	if (scale) {
280 		if (aggr->run == 0) {
281 			evsel->counts->scaled = -1;
282 			aggr->val = 0;
283 			return 0;
284 		}
285 
286 		if (aggr->run < aggr->ena) {
287 			evsel->counts->scaled = 1;
288 			aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
289 		}
290 	} else
291 		aggr->ena = aggr->run = 0;
292 
293 	return 0;
294 }
295 
__perf_evsel__open(struct perf_evsel * evsel,struct cpu_map * cpus,struct thread_map * threads,bool group,struct xyarray * group_fds)296 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
297 			      struct thread_map *threads, bool group,
298 			      struct xyarray *group_fds)
299 {
300 	int cpu, thread;
301 	unsigned long flags = 0;
302 	int pid = -1, err;
303 
304 	if (evsel->fd == NULL &&
305 	    perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
306 		return -ENOMEM;
307 
308 	if (evsel->cgrp) {
309 		flags = PERF_FLAG_PID_CGROUP;
310 		pid = evsel->cgrp->fd;
311 	}
312 
313 	for (cpu = 0; cpu < cpus->nr; cpu++) {
314 		int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1;
315 
316 		for (thread = 0; thread < threads->nr; thread++) {
317 
318 			if (!evsel->cgrp)
319 				pid = threads->map[thread];
320 
321 			FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
322 								     pid,
323 								     cpus->map[cpu],
324 								     group_fd, flags);
325 			if (FD(evsel, cpu, thread) < 0) {
326 				err = -errno;
327 				goto out_close;
328 			}
329 
330 			if (group && group_fd == -1)
331 				group_fd = FD(evsel, cpu, thread);
332 		}
333 	}
334 
335 	return 0;
336 
337 out_close:
338 	do {
339 		while (--thread >= 0) {
340 			close(FD(evsel, cpu, thread));
341 			FD(evsel, cpu, thread) = -1;
342 		}
343 		thread = threads->nr;
344 	} while (--cpu >= 0);
345 	return err;
346 }
347 
perf_evsel__close(struct perf_evsel * evsel,int ncpus,int nthreads)348 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
349 {
350 	if (evsel->fd == NULL)
351 		return;
352 
353 	perf_evsel__close_fd(evsel, ncpus, nthreads);
354 	perf_evsel__free_fd(evsel);
355 	evsel->fd = NULL;
356 }
357 
358 static struct {
359 	struct cpu_map map;
360 	int cpus[1];
361 } empty_cpu_map = {
362 	.map.nr	= 1,
363 	.cpus	= { -1, },
364 };
365 
366 static struct {
367 	struct thread_map map;
368 	int threads[1];
369 } empty_thread_map = {
370 	.map.nr	 = 1,
371 	.threads = { -1, },
372 };
373 
perf_evsel__open(struct perf_evsel * evsel,struct cpu_map * cpus,struct thread_map * threads,bool group,struct xyarray * group_fd)374 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
375 		     struct thread_map *threads, bool group,
376 		     struct xyarray *group_fd)
377 {
378 	if (cpus == NULL) {
379 		/* Work around old compiler warnings about strict aliasing */
380 		cpus = &empty_cpu_map.map;
381 	}
382 
383 	if (threads == NULL)
384 		threads = &empty_thread_map.map;
385 
386 	return __perf_evsel__open(evsel, cpus, threads, group, group_fd);
387 }
388 
perf_evsel__open_per_cpu(struct perf_evsel * evsel,struct cpu_map * cpus,bool group,struct xyarray * group_fd)389 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
390 			     struct cpu_map *cpus, bool group,
391 			     struct xyarray *group_fd)
392 {
393 	return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group,
394 				  group_fd);
395 }
396 
perf_evsel__open_per_thread(struct perf_evsel * evsel,struct thread_map * threads,bool group,struct xyarray * group_fd)397 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
398 				struct thread_map *threads, bool group,
399 				struct xyarray *group_fd)
400 {
401 	return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group,
402 				  group_fd);
403 }
404 
perf_event__parse_id_sample(const union perf_event * event,u64 type,struct perf_sample * sample)405 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
406 				       struct perf_sample *sample)
407 {
408 	const u64 *array = event->sample.array;
409 
410 	array += ((event->header.size -
411 		   sizeof(event->header)) / sizeof(u64)) - 1;
412 
413 	if (type & PERF_SAMPLE_CPU) {
414 		u32 *p = (u32 *)array;
415 		sample->cpu = *p;
416 		array--;
417 	}
418 
419 	if (type & PERF_SAMPLE_STREAM_ID) {
420 		sample->stream_id = *array;
421 		array--;
422 	}
423 
424 	if (type & PERF_SAMPLE_ID) {
425 		sample->id = *array;
426 		array--;
427 	}
428 
429 	if (type & PERF_SAMPLE_TIME) {
430 		sample->time = *array;
431 		array--;
432 	}
433 
434 	if (type & PERF_SAMPLE_TID) {
435 		u32 *p = (u32 *)array;
436 		sample->pid = p[0];
437 		sample->tid = p[1];
438 	}
439 
440 	return 0;
441 }
442 
sample_overlap(const union perf_event * event,const void * offset,u64 size)443 static bool sample_overlap(const union perf_event *event,
444 			   const void *offset, u64 size)
445 {
446 	const void *base = event;
447 
448 	if (offset + size > base + event->header.size)
449 		return true;
450 
451 	return false;
452 }
453 
perf_event__parse_sample(const union perf_event * event,u64 type,int sample_size,bool sample_id_all,struct perf_sample * data,bool swapped)454 int perf_event__parse_sample(const union perf_event *event, u64 type,
455 			     int sample_size, bool sample_id_all,
456 			     struct perf_sample *data, bool swapped)
457 {
458 	const u64 *array;
459 
460 	/*
461 	 * used for cross-endian analysis. See git commit 65014ab3
462 	 * for why this goofiness is needed.
463 	 */
464 	union {
465 		u64 val64;
466 		u32 val32[2];
467 	} u;
468 
469 	memset(data, 0, sizeof(*data));
470 	data->cpu = data->pid = data->tid = -1;
471 	data->stream_id = data->id = data->time = -1ULL;
472 	data->period = 1;
473 
474 	if (event->header.type != PERF_RECORD_SAMPLE) {
475 		if (!sample_id_all)
476 			return 0;
477 		return perf_event__parse_id_sample(event, type, data);
478 	}
479 
480 	array = event->sample.array;
481 
482 	if (sample_size + sizeof(event->header) > event->header.size)
483 		return -EFAULT;
484 
485 	if (type & PERF_SAMPLE_IP) {
486 		data->ip = event->ip.ip;
487 		array++;
488 	}
489 
490 	if (type & PERF_SAMPLE_TID) {
491 		u.val64 = *array;
492 		if (swapped) {
493 			/* undo swap of u64, then swap on individual u32s */
494 			u.val64 = bswap_64(u.val64);
495 			u.val32[0] = bswap_32(u.val32[0]);
496 			u.val32[1] = bswap_32(u.val32[1]);
497 		}
498 
499 		data->pid = u.val32[0];
500 		data->tid = u.val32[1];
501 		array++;
502 	}
503 
504 	if (type & PERF_SAMPLE_TIME) {
505 		data->time = *array;
506 		array++;
507 	}
508 
509 	data->addr = 0;
510 	if (type & PERF_SAMPLE_ADDR) {
511 		data->addr = *array;
512 		array++;
513 	}
514 
515 	data->id = -1ULL;
516 	if (type & PERF_SAMPLE_ID) {
517 		data->id = *array;
518 		array++;
519 	}
520 
521 	if (type & PERF_SAMPLE_STREAM_ID) {
522 		data->stream_id = *array;
523 		array++;
524 	}
525 
526 	if (type & PERF_SAMPLE_CPU) {
527 
528 		u.val64 = *array;
529 		if (swapped) {
530 			/* undo swap of u64, then swap on individual u32s */
531 			u.val64 = bswap_64(u.val64);
532 			u.val32[0] = bswap_32(u.val32[0]);
533 		}
534 
535 		data->cpu = u.val32[0];
536 		array++;
537 	}
538 
539 	if (type & PERF_SAMPLE_PERIOD) {
540 		data->period = *array;
541 		array++;
542 	}
543 
544 	if (type & PERF_SAMPLE_READ) {
545 		fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
546 		return -1;
547 	}
548 
549 	if (type & PERF_SAMPLE_CALLCHAIN) {
550 		if (sample_overlap(event, array, sizeof(data->callchain->nr)))
551 			return -EFAULT;
552 
553 		data->callchain = (struct ip_callchain *)array;
554 
555 		if (sample_overlap(event, array, data->callchain->nr))
556 			return -EFAULT;
557 
558 		array += 1 + data->callchain->nr;
559 	}
560 
561 	if (type & PERF_SAMPLE_RAW) {
562 		const u64 *pdata;
563 
564 		u.val64 = *array;
565 		if (WARN_ONCE(swapped,
566 			      "Endianness of raw data not corrected!\n")) {
567 			/* undo swap of u64, then swap on individual u32s */
568 			u.val64 = bswap_64(u.val64);
569 			u.val32[0] = bswap_32(u.val32[0]);
570 			u.val32[1] = bswap_32(u.val32[1]);
571 		}
572 
573 		if (sample_overlap(event, array, sizeof(u32)))
574 			return -EFAULT;
575 
576 		data->raw_size = u.val32[0];
577 		pdata = (void *) array + sizeof(u32);
578 
579 		if (sample_overlap(event, pdata, data->raw_size))
580 			return -EFAULT;
581 
582 		data->raw_data = (void *) pdata;
583 
584 		array = (void *)array + data->raw_size + sizeof(u32);
585 	}
586 
587 	if (type & PERF_SAMPLE_BRANCH_STACK) {
588 		u64 sz;
589 
590 		data->branch_stack = (struct branch_stack *)array;
591 		array++; /* nr */
592 
593 		sz = data->branch_stack->nr * sizeof(struct branch_entry);
594 		sz /= sizeof(u64);
595 		array += sz;
596 	}
597 	return 0;
598 }
599 
perf_event__synthesize_sample(union perf_event * event,u64 type,const struct perf_sample * sample,bool swapped)600 int perf_event__synthesize_sample(union perf_event *event, u64 type,
601 				  const struct perf_sample *sample,
602 				  bool swapped)
603 {
604 	u64 *array;
605 
606 	/*
607 	 * used for cross-endian analysis. See git commit 65014ab3
608 	 * for why this goofiness is needed.
609 	 */
610 	union {
611 		u64 val64;
612 		u32 val32[2];
613 	} u;
614 
615 	array = event->sample.array;
616 
617 	if (type & PERF_SAMPLE_IP) {
618 		event->ip.ip = sample->ip;
619 		array++;
620 	}
621 
622 	if (type & PERF_SAMPLE_TID) {
623 		u.val32[0] = sample->pid;
624 		u.val32[1] = sample->tid;
625 		if (swapped) {
626 			/*
627 			 * Inverse of what is done in perf_event__parse_sample
628 			 */
629 			u.val32[0] = bswap_32(u.val32[0]);
630 			u.val32[1] = bswap_32(u.val32[1]);
631 			u.val64 = bswap_64(u.val64);
632 		}
633 
634 		*array = u.val64;
635 		array++;
636 	}
637 
638 	if (type & PERF_SAMPLE_TIME) {
639 		*array = sample->time;
640 		array++;
641 	}
642 
643 	if (type & PERF_SAMPLE_ADDR) {
644 		*array = sample->addr;
645 		array++;
646 	}
647 
648 	if (type & PERF_SAMPLE_ID) {
649 		*array = sample->id;
650 		array++;
651 	}
652 
653 	if (type & PERF_SAMPLE_STREAM_ID) {
654 		*array = sample->stream_id;
655 		array++;
656 	}
657 
658 	if (type & PERF_SAMPLE_CPU) {
659 		u.val32[0] = sample->cpu;
660 		if (swapped) {
661 			/*
662 			 * Inverse of what is done in perf_event__parse_sample
663 			 */
664 			u.val32[0] = bswap_32(u.val32[0]);
665 			u.val64 = bswap_64(u.val64);
666 		}
667 		*array = u.val64;
668 		array++;
669 	}
670 
671 	if (type & PERF_SAMPLE_PERIOD) {
672 		*array = sample->period;
673 		array++;
674 	}
675 
676 	return 0;
677 }
678