1 /*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9
10 #include <byteswap.h>
11 #include "asm/bug.h"
12 #include "evsel.h"
13 #include "evlist.h"
14 #include "util.h"
15 #include "cpumap.h"
16 #include "thread_map.h"
17
18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
20
__perf_evsel__sample_size(u64 sample_type)21 int __perf_evsel__sample_size(u64 sample_type)
22 {
23 u64 mask = sample_type & PERF_SAMPLE_MASK;
24 int size = 0;
25 int i;
26
27 for (i = 0; i < 64; i++) {
28 if (mask & (1ULL << i))
29 size++;
30 }
31
32 size *= sizeof(u64);
33
34 return size;
35 }
36
hists__init(struct hists * hists)37 void hists__init(struct hists *hists)
38 {
39 memset(hists, 0, sizeof(*hists));
40 hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
41 hists->entries_in = &hists->entries_in_array[0];
42 hists->entries_collapsed = RB_ROOT;
43 hists->entries = RB_ROOT;
44 pthread_mutex_init(&hists->lock, NULL);
45 }
46
perf_evsel__init(struct perf_evsel * evsel,struct perf_event_attr * attr,int idx)47 void perf_evsel__init(struct perf_evsel *evsel,
48 struct perf_event_attr *attr, int idx)
49 {
50 evsel->idx = idx;
51 evsel->attr = *attr;
52 INIT_LIST_HEAD(&evsel->node);
53 hists__init(&evsel->hists);
54 }
55
perf_evsel__new(struct perf_event_attr * attr,int idx)56 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
57 {
58 struct perf_evsel *evsel = zalloc(sizeof(*evsel));
59
60 if (evsel != NULL)
61 perf_evsel__init(evsel, attr, idx);
62
63 return evsel;
64 }
65
perf_evsel__config(struct perf_evsel * evsel,struct perf_record_opts * opts,struct perf_evsel * first)66 void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts,
67 struct perf_evsel *first)
68 {
69 struct perf_event_attr *attr = &evsel->attr;
70 int track = !evsel->idx; /* only the first counter needs these */
71
72 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
73 attr->inherit = !opts->no_inherit;
74 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
75 PERF_FORMAT_TOTAL_TIME_RUNNING |
76 PERF_FORMAT_ID;
77
78 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
79
80 /*
81 * We default some events to a 1 default interval. But keep
82 * it a weak assumption overridable by the user.
83 */
84 if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
85 opts->user_interval != ULLONG_MAX)) {
86 if (opts->freq) {
87 attr->sample_type |= PERF_SAMPLE_PERIOD;
88 attr->freq = 1;
89 attr->sample_freq = opts->freq;
90 } else {
91 attr->sample_period = opts->default_interval;
92 }
93 }
94
95 if (opts->no_samples)
96 attr->sample_freq = 0;
97
98 if (opts->inherit_stat)
99 attr->inherit_stat = 1;
100
101 if (opts->sample_address) {
102 attr->sample_type |= PERF_SAMPLE_ADDR;
103 attr->mmap_data = track;
104 }
105
106 if (opts->call_graph)
107 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
108
109 if (opts->system_wide)
110 attr->sample_type |= PERF_SAMPLE_CPU;
111
112 if (opts->period)
113 attr->sample_type |= PERF_SAMPLE_PERIOD;
114
115 if (!opts->sample_id_all_missing &&
116 (opts->sample_time || opts->system_wide ||
117 !opts->no_inherit || opts->cpu_list))
118 attr->sample_type |= PERF_SAMPLE_TIME;
119
120 if (opts->raw_samples) {
121 attr->sample_type |= PERF_SAMPLE_TIME;
122 attr->sample_type |= PERF_SAMPLE_RAW;
123 attr->sample_type |= PERF_SAMPLE_CPU;
124 }
125
126 if (opts->no_delay) {
127 attr->watermark = 0;
128 attr->wakeup_events = 1;
129 }
130 if (opts->branch_stack) {
131 attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
132 attr->branch_sample_type = opts->branch_stack;
133 }
134
135 attr->mmap = track;
136 attr->comm = track;
137
138 if (!opts->target_pid && !opts->target_tid && !opts->system_wide &&
139 (!opts->group || evsel == first)) {
140 attr->disabled = 1;
141 attr->enable_on_exec = 1;
142 }
143 }
144
perf_evsel__alloc_fd(struct perf_evsel * evsel,int ncpus,int nthreads)145 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
146 {
147 int cpu, thread;
148 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
149
150 if (evsel->fd) {
151 for (cpu = 0; cpu < ncpus; cpu++) {
152 for (thread = 0; thread < nthreads; thread++) {
153 FD(evsel, cpu, thread) = -1;
154 }
155 }
156 }
157
158 return evsel->fd != NULL ? 0 : -ENOMEM;
159 }
160
perf_evsel__alloc_id(struct perf_evsel * evsel,int ncpus,int nthreads)161 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
162 {
163 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
164 if (evsel->sample_id == NULL)
165 return -ENOMEM;
166
167 evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
168 if (evsel->id == NULL) {
169 xyarray__delete(evsel->sample_id);
170 evsel->sample_id = NULL;
171 return -ENOMEM;
172 }
173
174 return 0;
175 }
176
perf_evsel__alloc_counts(struct perf_evsel * evsel,int ncpus)177 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
178 {
179 evsel->counts = zalloc((sizeof(*evsel->counts) +
180 (ncpus * sizeof(struct perf_counts_values))));
181 return evsel->counts != NULL ? 0 : -ENOMEM;
182 }
183
perf_evsel__free_fd(struct perf_evsel * evsel)184 void perf_evsel__free_fd(struct perf_evsel *evsel)
185 {
186 xyarray__delete(evsel->fd);
187 evsel->fd = NULL;
188 }
189
perf_evsel__free_id(struct perf_evsel * evsel)190 void perf_evsel__free_id(struct perf_evsel *evsel)
191 {
192 xyarray__delete(evsel->sample_id);
193 evsel->sample_id = NULL;
194 free(evsel->id);
195 evsel->id = NULL;
196 }
197
perf_evsel__close_fd(struct perf_evsel * evsel,int ncpus,int nthreads)198 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
199 {
200 int cpu, thread;
201
202 for (cpu = 0; cpu < ncpus; cpu++)
203 for (thread = 0; thread < nthreads; ++thread) {
204 close(FD(evsel, cpu, thread));
205 FD(evsel, cpu, thread) = -1;
206 }
207 }
208
perf_evsel__exit(struct perf_evsel * evsel)209 void perf_evsel__exit(struct perf_evsel *evsel)
210 {
211 assert(list_empty(&evsel->node));
212 xyarray__delete(evsel->fd);
213 xyarray__delete(evsel->sample_id);
214 free(evsel->id);
215 }
216
perf_evsel__delete(struct perf_evsel * evsel)217 void perf_evsel__delete(struct perf_evsel *evsel)
218 {
219 perf_evsel__exit(evsel);
220 close_cgroup(evsel->cgrp);
221 free(evsel->name);
222 free(evsel);
223 }
224
__perf_evsel__read_on_cpu(struct perf_evsel * evsel,int cpu,int thread,bool scale)225 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
226 int cpu, int thread, bool scale)
227 {
228 struct perf_counts_values count;
229 size_t nv = scale ? 3 : 1;
230
231 if (FD(evsel, cpu, thread) < 0)
232 return -EINVAL;
233
234 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
235 return -ENOMEM;
236
237 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
238 return -errno;
239
240 if (scale) {
241 if (count.run == 0)
242 count.val = 0;
243 else if (count.run < count.ena)
244 count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
245 } else
246 count.ena = count.run = 0;
247
248 evsel->counts->cpu[cpu] = count;
249 return 0;
250 }
251
__perf_evsel__read(struct perf_evsel * evsel,int ncpus,int nthreads,bool scale)252 int __perf_evsel__read(struct perf_evsel *evsel,
253 int ncpus, int nthreads, bool scale)
254 {
255 size_t nv = scale ? 3 : 1;
256 int cpu, thread;
257 struct perf_counts_values *aggr = &evsel->counts->aggr, count;
258
259 aggr->val = aggr->ena = aggr->run = 0;
260
261 for (cpu = 0; cpu < ncpus; cpu++) {
262 for (thread = 0; thread < nthreads; thread++) {
263 if (FD(evsel, cpu, thread) < 0)
264 continue;
265
266 if (readn(FD(evsel, cpu, thread),
267 &count, nv * sizeof(u64)) < 0)
268 return -errno;
269
270 aggr->val += count.val;
271 if (scale) {
272 aggr->ena += count.ena;
273 aggr->run += count.run;
274 }
275 }
276 }
277
278 evsel->counts->scaled = 0;
279 if (scale) {
280 if (aggr->run == 0) {
281 evsel->counts->scaled = -1;
282 aggr->val = 0;
283 return 0;
284 }
285
286 if (aggr->run < aggr->ena) {
287 evsel->counts->scaled = 1;
288 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
289 }
290 } else
291 aggr->ena = aggr->run = 0;
292
293 return 0;
294 }
295
__perf_evsel__open(struct perf_evsel * evsel,struct cpu_map * cpus,struct thread_map * threads,bool group,struct xyarray * group_fds)296 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
297 struct thread_map *threads, bool group,
298 struct xyarray *group_fds)
299 {
300 int cpu, thread;
301 unsigned long flags = 0;
302 int pid = -1, err;
303
304 if (evsel->fd == NULL &&
305 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
306 return -ENOMEM;
307
308 if (evsel->cgrp) {
309 flags = PERF_FLAG_PID_CGROUP;
310 pid = evsel->cgrp->fd;
311 }
312
313 for (cpu = 0; cpu < cpus->nr; cpu++) {
314 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1;
315
316 for (thread = 0; thread < threads->nr; thread++) {
317
318 if (!evsel->cgrp)
319 pid = threads->map[thread];
320
321 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
322 pid,
323 cpus->map[cpu],
324 group_fd, flags);
325 if (FD(evsel, cpu, thread) < 0) {
326 err = -errno;
327 goto out_close;
328 }
329
330 if (group && group_fd == -1)
331 group_fd = FD(evsel, cpu, thread);
332 }
333 }
334
335 return 0;
336
337 out_close:
338 do {
339 while (--thread >= 0) {
340 close(FD(evsel, cpu, thread));
341 FD(evsel, cpu, thread) = -1;
342 }
343 thread = threads->nr;
344 } while (--cpu >= 0);
345 return err;
346 }
347
perf_evsel__close(struct perf_evsel * evsel,int ncpus,int nthreads)348 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
349 {
350 if (evsel->fd == NULL)
351 return;
352
353 perf_evsel__close_fd(evsel, ncpus, nthreads);
354 perf_evsel__free_fd(evsel);
355 evsel->fd = NULL;
356 }
357
358 static struct {
359 struct cpu_map map;
360 int cpus[1];
361 } empty_cpu_map = {
362 .map.nr = 1,
363 .cpus = { -1, },
364 };
365
366 static struct {
367 struct thread_map map;
368 int threads[1];
369 } empty_thread_map = {
370 .map.nr = 1,
371 .threads = { -1, },
372 };
373
perf_evsel__open(struct perf_evsel * evsel,struct cpu_map * cpus,struct thread_map * threads,bool group,struct xyarray * group_fd)374 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
375 struct thread_map *threads, bool group,
376 struct xyarray *group_fd)
377 {
378 if (cpus == NULL) {
379 /* Work around old compiler warnings about strict aliasing */
380 cpus = &empty_cpu_map.map;
381 }
382
383 if (threads == NULL)
384 threads = &empty_thread_map.map;
385
386 return __perf_evsel__open(evsel, cpus, threads, group, group_fd);
387 }
388
perf_evsel__open_per_cpu(struct perf_evsel * evsel,struct cpu_map * cpus,bool group,struct xyarray * group_fd)389 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
390 struct cpu_map *cpus, bool group,
391 struct xyarray *group_fd)
392 {
393 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group,
394 group_fd);
395 }
396
perf_evsel__open_per_thread(struct perf_evsel * evsel,struct thread_map * threads,bool group,struct xyarray * group_fd)397 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
398 struct thread_map *threads, bool group,
399 struct xyarray *group_fd)
400 {
401 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group,
402 group_fd);
403 }
404
perf_event__parse_id_sample(const union perf_event * event,u64 type,struct perf_sample * sample)405 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
406 struct perf_sample *sample)
407 {
408 const u64 *array = event->sample.array;
409
410 array += ((event->header.size -
411 sizeof(event->header)) / sizeof(u64)) - 1;
412
413 if (type & PERF_SAMPLE_CPU) {
414 u32 *p = (u32 *)array;
415 sample->cpu = *p;
416 array--;
417 }
418
419 if (type & PERF_SAMPLE_STREAM_ID) {
420 sample->stream_id = *array;
421 array--;
422 }
423
424 if (type & PERF_SAMPLE_ID) {
425 sample->id = *array;
426 array--;
427 }
428
429 if (type & PERF_SAMPLE_TIME) {
430 sample->time = *array;
431 array--;
432 }
433
434 if (type & PERF_SAMPLE_TID) {
435 u32 *p = (u32 *)array;
436 sample->pid = p[0];
437 sample->tid = p[1];
438 }
439
440 return 0;
441 }
442
sample_overlap(const union perf_event * event,const void * offset,u64 size)443 static bool sample_overlap(const union perf_event *event,
444 const void *offset, u64 size)
445 {
446 const void *base = event;
447
448 if (offset + size > base + event->header.size)
449 return true;
450
451 return false;
452 }
453
perf_event__parse_sample(const union perf_event * event,u64 type,int sample_size,bool sample_id_all,struct perf_sample * data,bool swapped)454 int perf_event__parse_sample(const union perf_event *event, u64 type,
455 int sample_size, bool sample_id_all,
456 struct perf_sample *data, bool swapped)
457 {
458 const u64 *array;
459
460 /*
461 * used for cross-endian analysis. See git commit 65014ab3
462 * for why this goofiness is needed.
463 */
464 union {
465 u64 val64;
466 u32 val32[2];
467 } u;
468
469 memset(data, 0, sizeof(*data));
470 data->cpu = data->pid = data->tid = -1;
471 data->stream_id = data->id = data->time = -1ULL;
472 data->period = 1;
473
474 if (event->header.type != PERF_RECORD_SAMPLE) {
475 if (!sample_id_all)
476 return 0;
477 return perf_event__parse_id_sample(event, type, data);
478 }
479
480 array = event->sample.array;
481
482 if (sample_size + sizeof(event->header) > event->header.size)
483 return -EFAULT;
484
485 if (type & PERF_SAMPLE_IP) {
486 data->ip = event->ip.ip;
487 array++;
488 }
489
490 if (type & PERF_SAMPLE_TID) {
491 u.val64 = *array;
492 if (swapped) {
493 /* undo swap of u64, then swap on individual u32s */
494 u.val64 = bswap_64(u.val64);
495 u.val32[0] = bswap_32(u.val32[0]);
496 u.val32[1] = bswap_32(u.val32[1]);
497 }
498
499 data->pid = u.val32[0];
500 data->tid = u.val32[1];
501 array++;
502 }
503
504 if (type & PERF_SAMPLE_TIME) {
505 data->time = *array;
506 array++;
507 }
508
509 data->addr = 0;
510 if (type & PERF_SAMPLE_ADDR) {
511 data->addr = *array;
512 array++;
513 }
514
515 data->id = -1ULL;
516 if (type & PERF_SAMPLE_ID) {
517 data->id = *array;
518 array++;
519 }
520
521 if (type & PERF_SAMPLE_STREAM_ID) {
522 data->stream_id = *array;
523 array++;
524 }
525
526 if (type & PERF_SAMPLE_CPU) {
527
528 u.val64 = *array;
529 if (swapped) {
530 /* undo swap of u64, then swap on individual u32s */
531 u.val64 = bswap_64(u.val64);
532 u.val32[0] = bswap_32(u.val32[0]);
533 }
534
535 data->cpu = u.val32[0];
536 array++;
537 }
538
539 if (type & PERF_SAMPLE_PERIOD) {
540 data->period = *array;
541 array++;
542 }
543
544 if (type & PERF_SAMPLE_READ) {
545 fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
546 return -1;
547 }
548
549 if (type & PERF_SAMPLE_CALLCHAIN) {
550 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
551 return -EFAULT;
552
553 data->callchain = (struct ip_callchain *)array;
554
555 if (sample_overlap(event, array, data->callchain->nr))
556 return -EFAULT;
557
558 array += 1 + data->callchain->nr;
559 }
560
561 if (type & PERF_SAMPLE_RAW) {
562 const u64 *pdata;
563
564 u.val64 = *array;
565 if (WARN_ONCE(swapped,
566 "Endianness of raw data not corrected!\n")) {
567 /* undo swap of u64, then swap on individual u32s */
568 u.val64 = bswap_64(u.val64);
569 u.val32[0] = bswap_32(u.val32[0]);
570 u.val32[1] = bswap_32(u.val32[1]);
571 }
572
573 if (sample_overlap(event, array, sizeof(u32)))
574 return -EFAULT;
575
576 data->raw_size = u.val32[0];
577 pdata = (void *) array + sizeof(u32);
578
579 if (sample_overlap(event, pdata, data->raw_size))
580 return -EFAULT;
581
582 data->raw_data = (void *) pdata;
583
584 array = (void *)array + data->raw_size + sizeof(u32);
585 }
586
587 if (type & PERF_SAMPLE_BRANCH_STACK) {
588 u64 sz;
589
590 data->branch_stack = (struct branch_stack *)array;
591 array++; /* nr */
592
593 sz = data->branch_stack->nr * sizeof(struct branch_entry);
594 sz /= sizeof(u64);
595 array += sz;
596 }
597 return 0;
598 }
599
perf_event__synthesize_sample(union perf_event * event,u64 type,const struct perf_sample * sample,bool swapped)600 int perf_event__synthesize_sample(union perf_event *event, u64 type,
601 const struct perf_sample *sample,
602 bool swapped)
603 {
604 u64 *array;
605
606 /*
607 * used for cross-endian analysis. See git commit 65014ab3
608 * for why this goofiness is needed.
609 */
610 union {
611 u64 val64;
612 u32 val32[2];
613 } u;
614
615 array = event->sample.array;
616
617 if (type & PERF_SAMPLE_IP) {
618 event->ip.ip = sample->ip;
619 array++;
620 }
621
622 if (type & PERF_SAMPLE_TID) {
623 u.val32[0] = sample->pid;
624 u.val32[1] = sample->tid;
625 if (swapped) {
626 /*
627 * Inverse of what is done in perf_event__parse_sample
628 */
629 u.val32[0] = bswap_32(u.val32[0]);
630 u.val32[1] = bswap_32(u.val32[1]);
631 u.val64 = bswap_64(u.val64);
632 }
633
634 *array = u.val64;
635 array++;
636 }
637
638 if (type & PERF_SAMPLE_TIME) {
639 *array = sample->time;
640 array++;
641 }
642
643 if (type & PERF_SAMPLE_ADDR) {
644 *array = sample->addr;
645 array++;
646 }
647
648 if (type & PERF_SAMPLE_ID) {
649 *array = sample->id;
650 array++;
651 }
652
653 if (type & PERF_SAMPLE_STREAM_ID) {
654 *array = sample->stream_id;
655 array++;
656 }
657
658 if (type & PERF_SAMPLE_CPU) {
659 u.val32[0] = sample->cpu;
660 if (swapped) {
661 /*
662 * Inverse of what is done in perf_event__parse_sample
663 */
664 u.val32[0] = bswap_32(u.val32[0]);
665 u.val64 = bswap_64(u.val64);
666 }
667 *array = u.val64;
668 array++;
669 }
670
671 if (type & PERF_SAMPLE_PERIOD) {
672 *array = sample->period;
673 array++;
674 }
675
676 return 0;
677 }
678