1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
8 */
9 #include "builtin.h"
10
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/record.h"
31 #include "util/cpumap.h"
32 #include "util/thread_map.h"
33 #include "util/data.h"
34 #include "util/perf_regs.h"
35 #include "util/auxtrace.h"
36 #include "util/tsc.h"
37 #include "util/parse-branch-options.h"
38 #include "util/parse-regs-options.h"
39 #include "util/perf_api_probe.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "util/pmu.h"
50 #include "util/pmus.h"
51 #include "util/clockid.h"
52 #include "util/off_cpu.h"
53 #include "util/bpf-filter.h"
54 #include "asm/bug.h"
55 #include "perf.h"
56 #include "cputopo.h"
57
58 #include <errno.h>
59 #include <inttypes.h>
60 #include <locale.h>
61 #include <poll.h>
62 #include <pthread.h>
63 #include <unistd.h>
64 #ifndef HAVE_GETTID
65 #include <syscall.h>
66 #endif
67 #include <sched.h>
68 #include <signal.h>
69 #ifdef HAVE_EVENTFD_SUPPORT
70 #include <sys/eventfd.h>
71 #endif
72 #include <sys/mman.h>
73 #include <sys/wait.h>
74 #include <sys/types.h>
75 #include <sys/stat.h>
76 #include <fcntl.h>
77 #include <linux/err.h>
78 #include <linux/string.h>
79 #include <linux/time64.h>
80 #include <linux/zalloc.h>
81 #include <linux/bitmap.h>
82 #include <sys/time.h>
83
84 struct switch_output {
85 bool enabled;
86 bool signal;
87 unsigned long size;
88 unsigned long time;
89 const char *str;
90 bool set;
91 char **filenames;
92 int num_files;
93 int cur_file;
94 };
95
96 struct thread_mask {
97 struct mmap_cpu_mask maps;
98 struct mmap_cpu_mask affinity;
99 };
100
101 struct record_thread {
102 pid_t tid;
103 struct thread_mask *mask;
104 struct {
105 int msg[2];
106 int ack[2];
107 } pipes;
108 struct fdarray pollfd;
109 int ctlfd_pos;
110 int nr_mmaps;
111 struct mmap **maps;
112 struct mmap **overwrite_maps;
113 struct record *rec;
114 unsigned long long samples;
115 unsigned long waking;
116 u64 bytes_written;
117 u64 bytes_transferred;
118 u64 bytes_compressed;
119 };
120
121 static __thread struct record_thread *thread;
122
123 enum thread_msg {
124 THREAD_MSG__UNDEFINED = 0,
125 THREAD_MSG__READY,
126 THREAD_MSG__MAX,
127 };
128
129 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
130 "UNDEFINED", "READY"
131 };
132
133 enum thread_spec {
134 THREAD_SPEC__UNDEFINED = 0,
135 THREAD_SPEC__CPU,
136 THREAD_SPEC__CORE,
137 THREAD_SPEC__PACKAGE,
138 THREAD_SPEC__NUMA,
139 THREAD_SPEC__USER,
140 THREAD_SPEC__MAX,
141 };
142
143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
144 "undefined", "cpu", "core", "package", "numa", "user"
145 };
146
147 struct pollfd_index_map {
148 int evlist_pollfd_index;
149 int thread_pollfd_index;
150 };
151
152 struct record {
153 struct perf_tool tool;
154 struct record_opts opts;
155 u64 bytes_written;
156 u64 thread_bytes_written;
157 struct perf_data data;
158 struct auxtrace_record *itr;
159 struct evlist *evlist;
160 struct perf_session *session;
161 struct evlist *sb_evlist;
162 pthread_t thread_id;
163 int realtime_prio;
164 bool switch_output_event_set;
165 bool no_buildid;
166 bool no_buildid_set;
167 bool no_buildid_cache;
168 bool no_buildid_cache_set;
169 bool buildid_all;
170 bool buildid_mmap;
171 bool timestamp_filename;
172 bool timestamp_boundary;
173 bool off_cpu;
174 struct switch_output switch_output;
175 unsigned long long samples;
176 unsigned long output_max_size; /* = 0: unlimited */
177 struct perf_debuginfod debuginfod;
178 int nr_threads;
179 struct thread_mask *thread_masks;
180 struct record_thread *thread_data;
181 struct pollfd_index_map *index_map;
182 size_t index_map_sz;
183 size_t index_map_cnt;
184 };
185
186 static volatile int done;
187
188 static volatile int auxtrace_record__snapshot_started;
189 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
190 static DEFINE_TRIGGER(switch_output_trigger);
191
192 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
193 "SYS", "NODE", "CPU"
194 };
195
196 #ifndef HAVE_GETTID
gettid(void)197 static inline pid_t gettid(void)
198 {
199 return (pid_t)syscall(__NR_gettid);
200 }
201 #endif
202
record__threads_enabled(struct record * rec)203 static int record__threads_enabled(struct record *rec)
204 {
205 return rec->opts.threads_spec;
206 }
207
switch_output_signal(struct record * rec)208 static bool switch_output_signal(struct record *rec)
209 {
210 return rec->switch_output.signal &&
211 trigger_is_ready(&switch_output_trigger);
212 }
213
switch_output_size(struct record * rec)214 static bool switch_output_size(struct record *rec)
215 {
216 return rec->switch_output.size &&
217 trigger_is_ready(&switch_output_trigger) &&
218 (rec->bytes_written >= rec->switch_output.size);
219 }
220
switch_output_time(struct record * rec)221 static bool switch_output_time(struct record *rec)
222 {
223 return rec->switch_output.time &&
224 trigger_is_ready(&switch_output_trigger);
225 }
226
record__bytes_written(struct record * rec)227 static u64 record__bytes_written(struct record *rec)
228 {
229 return rec->bytes_written + rec->thread_bytes_written;
230 }
231
record__output_max_size_exceeded(struct record * rec)232 static bool record__output_max_size_exceeded(struct record *rec)
233 {
234 return rec->output_max_size &&
235 (record__bytes_written(rec) >= rec->output_max_size);
236 }
237
record__write(struct record * rec,struct mmap * map __maybe_unused,void * bf,size_t size)238 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
239 void *bf, size_t size)
240 {
241 struct perf_data_file *file = &rec->session->data->file;
242
243 if (map && map->file)
244 file = map->file;
245
246 if (perf_data_file__write(file, bf, size) < 0) {
247 pr_err("failed to write perf data, error: %m\n");
248 return -1;
249 }
250
251 if (map && map->file) {
252 thread->bytes_written += size;
253 rec->thread_bytes_written += size;
254 } else {
255 rec->bytes_written += size;
256 }
257
258 if (record__output_max_size_exceeded(rec) && !done) {
259 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
260 " stopping session ]\n",
261 record__bytes_written(rec) >> 10);
262 done = 1;
263 }
264
265 if (switch_output_size(rec))
266 trigger_hit(&switch_output_trigger);
267
268 return 0;
269 }
270
271 static int record__aio_enabled(struct record *rec);
272 static int record__comp_enabled(struct record *rec);
273 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
274 void *dst, size_t dst_size, void *src, size_t src_size);
275
276 #ifdef HAVE_AIO_SUPPORT
record__aio_write(struct aiocb * cblock,int trace_fd,void * buf,size_t size,off_t off)277 static int record__aio_write(struct aiocb *cblock, int trace_fd,
278 void *buf, size_t size, off_t off)
279 {
280 int rc;
281
282 cblock->aio_fildes = trace_fd;
283 cblock->aio_buf = buf;
284 cblock->aio_nbytes = size;
285 cblock->aio_offset = off;
286 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
287
288 do {
289 rc = aio_write(cblock);
290 if (rc == 0) {
291 break;
292 } else if (errno != EAGAIN) {
293 cblock->aio_fildes = -1;
294 pr_err("failed to queue perf data, error: %m\n");
295 break;
296 }
297 } while (1);
298
299 return rc;
300 }
301
record__aio_complete(struct mmap * md,struct aiocb * cblock)302 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
303 {
304 void *rem_buf;
305 off_t rem_off;
306 size_t rem_size;
307 int rc, aio_errno;
308 ssize_t aio_ret, written;
309
310 aio_errno = aio_error(cblock);
311 if (aio_errno == EINPROGRESS)
312 return 0;
313
314 written = aio_ret = aio_return(cblock);
315 if (aio_ret < 0) {
316 if (aio_errno != EINTR)
317 pr_err("failed to write perf data, error: %m\n");
318 written = 0;
319 }
320
321 rem_size = cblock->aio_nbytes - written;
322
323 if (rem_size == 0) {
324 cblock->aio_fildes = -1;
325 /*
326 * md->refcount is incremented in record__aio_pushfn() for
327 * every aio write request started in record__aio_push() so
328 * decrement it because the request is now complete.
329 */
330 perf_mmap__put(&md->core);
331 rc = 1;
332 } else {
333 /*
334 * aio write request may require restart with the
335 * reminder if the kernel didn't write whole
336 * chunk at once.
337 */
338 rem_off = cblock->aio_offset + written;
339 rem_buf = (void *)(cblock->aio_buf + written);
340 record__aio_write(cblock, cblock->aio_fildes,
341 rem_buf, rem_size, rem_off);
342 rc = 0;
343 }
344
345 return rc;
346 }
347
record__aio_sync(struct mmap * md,bool sync_all)348 static int record__aio_sync(struct mmap *md, bool sync_all)
349 {
350 struct aiocb **aiocb = md->aio.aiocb;
351 struct aiocb *cblocks = md->aio.cblocks;
352 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
353 int i, do_suspend;
354
355 do {
356 do_suspend = 0;
357 for (i = 0; i < md->aio.nr_cblocks; ++i) {
358 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
359 if (sync_all)
360 aiocb[i] = NULL;
361 else
362 return i;
363 } else {
364 /*
365 * Started aio write is not complete yet
366 * so it has to be waited before the
367 * next allocation.
368 */
369 aiocb[i] = &cblocks[i];
370 do_suspend = 1;
371 }
372 }
373 if (!do_suspend)
374 return -1;
375
376 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
377 if (!(errno == EAGAIN || errno == EINTR))
378 pr_err("failed to sync perf data, error: %m\n");
379 }
380 } while (1);
381 }
382
383 struct record_aio {
384 struct record *rec;
385 void *data;
386 size_t size;
387 };
388
record__aio_pushfn(struct mmap * map,void * to,void * buf,size_t size)389 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
390 {
391 struct record_aio *aio = to;
392
393 /*
394 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
395 * to release space in the kernel buffer as fast as possible, calling
396 * perf_mmap__consume() from perf_mmap__push() function.
397 *
398 * That lets the kernel to proceed with storing more profiling data into
399 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
400 *
401 * Coping can be done in two steps in case the chunk of profiling data
402 * crosses the upper bound of the kernel buffer. In this case we first move
403 * part of data from map->start till the upper bound and then the reminder
404 * from the beginning of the kernel buffer till the end of the data chunk.
405 */
406
407 if (record__comp_enabled(aio->rec)) {
408 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
409 mmap__mmap_len(map) - aio->size,
410 buf, size);
411 } else {
412 memcpy(aio->data + aio->size, buf, size);
413 }
414
415 if (!aio->size) {
416 /*
417 * Increment map->refcount to guard map->aio.data[] buffer
418 * from premature deallocation because map object can be
419 * released earlier than aio write request started on
420 * map->aio.data[] buffer is complete.
421 *
422 * perf_mmap__put() is done at record__aio_complete()
423 * after started aio request completion or at record__aio_push()
424 * if the request failed to start.
425 */
426 perf_mmap__get(&map->core);
427 }
428
429 aio->size += size;
430
431 return size;
432 }
433
record__aio_push(struct record * rec,struct mmap * map,off_t * off)434 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
435 {
436 int ret, idx;
437 int trace_fd = rec->session->data->file.fd;
438 struct record_aio aio = { .rec = rec, .size = 0 };
439
440 /*
441 * Call record__aio_sync() to wait till map->aio.data[] buffer
442 * becomes available after previous aio write operation.
443 */
444
445 idx = record__aio_sync(map, false);
446 aio.data = map->aio.data[idx];
447 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
448 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
449 return ret;
450
451 rec->samples++;
452 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
453 if (!ret) {
454 *off += aio.size;
455 rec->bytes_written += aio.size;
456 if (switch_output_size(rec))
457 trigger_hit(&switch_output_trigger);
458 } else {
459 /*
460 * Decrement map->refcount incremented in record__aio_pushfn()
461 * back if record__aio_write() operation failed to start, otherwise
462 * map->refcount is decremented in record__aio_complete() after
463 * aio write operation finishes successfully.
464 */
465 perf_mmap__put(&map->core);
466 }
467
468 return ret;
469 }
470
record__aio_get_pos(int trace_fd)471 static off_t record__aio_get_pos(int trace_fd)
472 {
473 return lseek(trace_fd, 0, SEEK_CUR);
474 }
475
record__aio_set_pos(int trace_fd,off_t pos)476 static void record__aio_set_pos(int trace_fd, off_t pos)
477 {
478 lseek(trace_fd, pos, SEEK_SET);
479 }
480
record__aio_mmap_read_sync(struct record * rec)481 static void record__aio_mmap_read_sync(struct record *rec)
482 {
483 int i;
484 struct evlist *evlist = rec->evlist;
485 struct mmap *maps = evlist->mmap;
486
487 if (!record__aio_enabled(rec))
488 return;
489
490 for (i = 0; i < evlist->core.nr_mmaps; i++) {
491 struct mmap *map = &maps[i];
492
493 if (map->core.base)
494 record__aio_sync(map, true);
495 }
496 }
497
498 static int nr_cblocks_default = 1;
499 static int nr_cblocks_max = 4;
500
record__aio_parse(const struct option * opt,const char * str,int unset)501 static int record__aio_parse(const struct option *opt,
502 const char *str,
503 int unset)
504 {
505 struct record_opts *opts = (struct record_opts *)opt->value;
506
507 if (unset) {
508 opts->nr_cblocks = 0;
509 } else {
510 if (str)
511 opts->nr_cblocks = strtol(str, NULL, 0);
512 if (!opts->nr_cblocks)
513 opts->nr_cblocks = nr_cblocks_default;
514 }
515
516 return 0;
517 }
518 #else /* HAVE_AIO_SUPPORT */
519 static int nr_cblocks_max = 0;
520
record__aio_push(struct record * rec __maybe_unused,struct mmap * map __maybe_unused,off_t * off __maybe_unused)521 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
522 off_t *off __maybe_unused)
523 {
524 return -1;
525 }
526
record__aio_get_pos(int trace_fd __maybe_unused)527 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
528 {
529 return -1;
530 }
531
record__aio_set_pos(int trace_fd __maybe_unused,off_t pos __maybe_unused)532 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
533 {
534 }
535
record__aio_mmap_read_sync(struct record * rec __maybe_unused)536 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
537 {
538 }
539 #endif
540
record__aio_enabled(struct record * rec)541 static int record__aio_enabled(struct record *rec)
542 {
543 return rec->opts.nr_cblocks > 0;
544 }
545
546 #define MMAP_FLUSH_DEFAULT 1
record__mmap_flush_parse(const struct option * opt,const char * str,int unset)547 static int record__mmap_flush_parse(const struct option *opt,
548 const char *str,
549 int unset)
550 {
551 int flush_max;
552 struct record_opts *opts = (struct record_opts *)opt->value;
553 static struct parse_tag tags[] = {
554 { .tag = 'B', .mult = 1 },
555 { .tag = 'K', .mult = 1 << 10 },
556 { .tag = 'M', .mult = 1 << 20 },
557 { .tag = 'G', .mult = 1 << 30 },
558 { .tag = 0 },
559 };
560
561 if (unset)
562 return 0;
563
564 if (str) {
565 opts->mmap_flush = parse_tag_value(str, tags);
566 if (opts->mmap_flush == (int)-1)
567 opts->mmap_flush = strtol(str, NULL, 0);
568 }
569
570 if (!opts->mmap_flush)
571 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
572
573 flush_max = evlist__mmap_size(opts->mmap_pages);
574 flush_max /= 4;
575 if (opts->mmap_flush > flush_max)
576 opts->mmap_flush = flush_max;
577
578 return 0;
579 }
580
581 #ifdef HAVE_ZSTD_SUPPORT
582 static unsigned int comp_level_default = 1;
583
record__parse_comp_level(const struct option * opt,const char * str,int unset)584 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
585 {
586 struct record_opts *opts = opt->value;
587
588 if (unset) {
589 opts->comp_level = 0;
590 } else {
591 if (str)
592 opts->comp_level = strtol(str, NULL, 0);
593 if (!opts->comp_level)
594 opts->comp_level = comp_level_default;
595 }
596
597 return 0;
598 }
599 #endif
600 static unsigned int comp_level_max = 22;
601
record__comp_enabled(struct record * rec)602 static int record__comp_enabled(struct record *rec)
603 {
604 return rec->opts.comp_level > 0;
605 }
606
process_synthesized_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)607 static int process_synthesized_event(struct perf_tool *tool,
608 union perf_event *event,
609 struct perf_sample *sample __maybe_unused,
610 struct machine *machine __maybe_unused)
611 {
612 struct record *rec = container_of(tool, struct record, tool);
613 return record__write(rec, NULL, event, event->header.size);
614 }
615
616 static struct mutex synth_lock;
617
process_locked_synthesized_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)618 static int process_locked_synthesized_event(struct perf_tool *tool,
619 union perf_event *event,
620 struct perf_sample *sample __maybe_unused,
621 struct machine *machine __maybe_unused)
622 {
623 int ret;
624
625 mutex_lock(&synth_lock);
626 ret = process_synthesized_event(tool, event, sample, machine);
627 mutex_unlock(&synth_lock);
628 return ret;
629 }
630
record__pushfn(struct mmap * map,void * to,void * bf,size_t size)631 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
632 {
633 struct record *rec = to;
634
635 if (record__comp_enabled(rec)) {
636 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
637 bf = map->data;
638 }
639
640 thread->samples++;
641 return record__write(rec, map, bf, size);
642 }
643
644 static volatile sig_atomic_t signr = -1;
645 static volatile sig_atomic_t child_finished;
646 #ifdef HAVE_EVENTFD_SUPPORT
647 static volatile sig_atomic_t done_fd = -1;
648 #endif
649
sig_handler(int sig)650 static void sig_handler(int sig)
651 {
652 if (sig == SIGCHLD)
653 child_finished = 1;
654 else
655 signr = sig;
656
657 done = 1;
658 #ifdef HAVE_EVENTFD_SUPPORT
659 if (done_fd >= 0) {
660 u64 tmp = 1;
661 int orig_errno = errno;
662
663 /*
664 * It is possible for this signal handler to run after done is
665 * checked in the main loop, but before the perf counter fds are
666 * polled. If this happens, the poll() will continue to wait
667 * even though done is set, and will only break out if either
668 * another signal is received, or the counters are ready for
669 * read. To ensure the poll() doesn't sleep when done is set,
670 * use an eventfd (done_fd) to wake up the poll().
671 */
672 if (write(done_fd, &tmp, sizeof(tmp)) < 0)
673 pr_err("failed to signal wakeup fd, error: %m\n");
674
675 errno = orig_errno;
676 }
677 #endif // HAVE_EVENTFD_SUPPORT
678 }
679
sigsegv_handler(int sig)680 static void sigsegv_handler(int sig)
681 {
682 perf_hooks__recover();
683 sighandler_dump_stack(sig);
684 }
685
record__sig_exit(void)686 static void record__sig_exit(void)
687 {
688 if (signr == -1)
689 return;
690
691 signal(signr, SIG_DFL);
692 raise(signr);
693 }
694
695 #ifdef HAVE_AUXTRACE_SUPPORT
696
record__process_auxtrace(struct perf_tool * tool,struct mmap * map,union perf_event * event,void * data1,size_t len1,void * data2,size_t len2)697 static int record__process_auxtrace(struct perf_tool *tool,
698 struct mmap *map,
699 union perf_event *event, void *data1,
700 size_t len1, void *data2, size_t len2)
701 {
702 struct record *rec = container_of(tool, struct record, tool);
703 struct perf_data *data = &rec->data;
704 size_t padding;
705 u8 pad[8] = {0};
706
707 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
708 off_t file_offset;
709 int fd = perf_data__fd(data);
710 int err;
711
712 file_offset = lseek(fd, 0, SEEK_CUR);
713 if (file_offset == -1)
714 return -1;
715 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
716 event, file_offset);
717 if (err)
718 return err;
719 }
720
721 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
722 padding = (len1 + len2) & 7;
723 if (padding)
724 padding = 8 - padding;
725
726 record__write(rec, map, event, event->header.size);
727 record__write(rec, map, data1, len1);
728 if (len2)
729 record__write(rec, map, data2, len2);
730 record__write(rec, map, &pad, padding);
731
732 return 0;
733 }
734
record__auxtrace_mmap_read(struct record * rec,struct mmap * map)735 static int record__auxtrace_mmap_read(struct record *rec,
736 struct mmap *map)
737 {
738 int ret;
739
740 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
741 record__process_auxtrace);
742 if (ret < 0)
743 return ret;
744
745 if (ret)
746 rec->samples++;
747
748 return 0;
749 }
750
record__auxtrace_mmap_read_snapshot(struct record * rec,struct mmap * map)751 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
752 struct mmap *map)
753 {
754 int ret;
755
756 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
757 record__process_auxtrace,
758 rec->opts.auxtrace_snapshot_size);
759 if (ret < 0)
760 return ret;
761
762 if (ret)
763 rec->samples++;
764
765 return 0;
766 }
767
record__auxtrace_read_snapshot_all(struct record * rec)768 static int record__auxtrace_read_snapshot_all(struct record *rec)
769 {
770 int i;
771 int rc = 0;
772
773 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
774 struct mmap *map = &rec->evlist->mmap[i];
775
776 if (!map->auxtrace_mmap.base)
777 continue;
778
779 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
780 rc = -1;
781 goto out;
782 }
783 }
784 out:
785 return rc;
786 }
787
record__read_auxtrace_snapshot(struct record * rec,bool on_exit)788 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
789 {
790 pr_debug("Recording AUX area tracing snapshot\n");
791 if (record__auxtrace_read_snapshot_all(rec) < 0) {
792 trigger_error(&auxtrace_snapshot_trigger);
793 } else {
794 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
795 trigger_error(&auxtrace_snapshot_trigger);
796 else
797 trigger_ready(&auxtrace_snapshot_trigger);
798 }
799 }
800
record__auxtrace_snapshot_exit(struct record * rec)801 static int record__auxtrace_snapshot_exit(struct record *rec)
802 {
803 if (trigger_is_error(&auxtrace_snapshot_trigger))
804 return 0;
805
806 if (!auxtrace_record__snapshot_started &&
807 auxtrace_record__snapshot_start(rec->itr))
808 return -1;
809
810 record__read_auxtrace_snapshot(rec, true);
811 if (trigger_is_error(&auxtrace_snapshot_trigger))
812 return -1;
813
814 return 0;
815 }
816
record__auxtrace_init(struct record * rec)817 static int record__auxtrace_init(struct record *rec)
818 {
819 int err;
820
821 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
822 && record__threads_enabled(rec)) {
823 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
824 return -EINVAL;
825 }
826
827 if (!rec->itr) {
828 rec->itr = auxtrace_record__init(rec->evlist, &err);
829 if (err)
830 return err;
831 }
832
833 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
834 rec->opts.auxtrace_snapshot_opts);
835 if (err)
836 return err;
837
838 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
839 rec->opts.auxtrace_sample_opts);
840 if (err)
841 return err;
842
843 auxtrace_regroup_aux_output(rec->evlist);
844
845 return auxtrace_parse_filters(rec->evlist);
846 }
847
848 #else
849
850 static inline
record__auxtrace_mmap_read(struct record * rec __maybe_unused,struct mmap * map __maybe_unused)851 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
852 struct mmap *map __maybe_unused)
853 {
854 return 0;
855 }
856
857 static inline
record__read_auxtrace_snapshot(struct record * rec __maybe_unused,bool on_exit __maybe_unused)858 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
859 bool on_exit __maybe_unused)
860 {
861 }
862
863 static inline
auxtrace_record__snapshot_start(struct auxtrace_record * itr __maybe_unused)864 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
865 {
866 return 0;
867 }
868
869 static inline
record__auxtrace_snapshot_exit(struct record * rec __maybe_unused)870 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
871 {
872 return 0;
873 }
874
record__auxtrace_init(struct record * rec __maybe_unused)875 static int record__auxtrace_init(struct record *rec __maybe_unused)
876 {
877 return 0;
878 }
879
880 #endif
881
record__config_text_poke(struct evlist * evlist)882 static int record__config_text_poke(struct evlist *evlist)
883 {
884 struct evsel *evsel;
885
886 /* Nothing to do if text poke is already configured */
887 evlist__for_each_entry(evlist, evsel) {
888 if (evsel->core.attr.text_poke)
889 return 0;
890 }
891
892 evsel = evlist__add_dummy_on_all_cpus(evlist);
893 if (!evsel)
894 return -ENOMEM;
895
896 evsel->core.attr.text_poke = 1;
897 evsel->core.attr.ksymbol = 1;
898 evsel->immediate = true;
899 evsel__set_sample_bit(evsel, TIME);
900
901 return 0;
902 }
903
record__config_off_cpu(struct record * rec)904 static int record__config_off_cpu(struct record *rec)
905 {
906 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
907 }
908
record__kcore_readable(struct machine * machine)909 static bool record__kcore_readable(struct machine *machine)
910 {
911 char kcore[PATH_MAX];
912 int fd;
913
914 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
915
916 fd = open(kcore, O_RDONLY);
917 if (fd < 0)
918 return false;
919
920 close(fd);
921
922 return true;
923 }
924
record__kcore_copy(struct machine * machine,struct perf_data * data)925 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
926 {
927 char from_dir[PATH_MAX];
928 char kcore_dir[PATH_MAX];
929 int ret;
930
931 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
932
933 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
934 if (ret)
935 return ret;
936
937 return kcore_copy(from_dir, kcore_dir);
938 }
939
record__thread_data_init_pipes(struct record_thread * thread_data)940 static void record__thread_data_init_pipes(struct record_thread *thread_data)
941 {
942 thread_data->pipes.msg[0] = -1;
943 thread_data->pipes.msg[1] = -1;
944 thread_data->pipes.ack[0] = -1;
945 thread_data->pipes.ack[1] = -1;
946 }
947
record__thread_data_open_pipes(struct record_thread * thread_data)948 static int record__thread_data_open_pipes(struct record_thread *thread_data)
949 {
950 if (pipe(thread_data->pipes.msg))
951 return -EINVAL;
952
953 if (pipe(thread_data->pipes.ack)) {
954 close(thread_data->pipes.msg[0]);
955 thread_data->pipes.msg[0] = -1;
956 close(thread_data->pipes.msg[1]);
957 thread_data->pipes.msg[1] = -1;
958 return -EINVAL;
959 }
960
961 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
962 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
963 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
964
965 return 0;
966 }
967
record__thread_data_close_pipes(struct record_thread * thread_data)968 static void record__thread_data_close_pipes(struct record_thread *thread_data)
969 {
970 if (thread_data->pipes.msg[0] != -1) {
971 close(thread_data->pipes.msg[0]);
972 thread_data->pipes.msg[0] = -1;
973 }
974 if (thread_data->pipes.msg[1] != -1) {
975 close(thread_data->pipes.msg[1]);
976 thread_data->pipes.msg[1] = -1;
977 }
978 if (thread_data->pipes.ack[0] != -1) {
979 close(thread_data->pipes.ack[0]);
980 thread_data->pipes.ack[0] = -1;
981 }
982 if (thread_data->pipes.ack[1] != -1) {
983 close(thread_data->pipes.ack[1]);
984 thread_data->pipes.ack[1] = -1;
985 }
986 }
987
evlist__per_thread(struct evlist * evlist)988 static bool evlist__per_thread(struct evlist *evlist)
989 {
990 return cpu_map__is_dummy(evlist->core.user_requested_cpus);
991 }
992
record__thread_data_init_maps(struct record_thread * thread_data,struct evlist * evlist)993 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
994 {
995 int m, tm, nr_mmaps = evlist->core.nr_mmaps;
996 struct mmap *mmap = evlist->mmap;
997 struct mmap *overwrite_mmap = evlist->overwrite_mmap;
998 struct perf_cpu_map *cpus = evlist->core.all_cpus;
999 bool per_thread = evlist__per_thread(evlist);
1000
1001 if (per_thread)
1002 thread_data->nr_mmaps = nr_mmaps;
1003 else
1004 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1005 thread_data->mask->maps.nbits);
1006 if (mmap) {
1007 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1008 if (!thread_data->maps)
1009 return -ENOMEM;
1010 }
1011 if (overwrite_mmap) {
1012 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1013 if (!thread_data->overwrite_maps) {
1014 zfree(&thread_data->maps);
1015 return -ENOMEM;
1016 }
1017 }
1018 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1019 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1020
1021 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1022 if (per_thread ||
1023 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1024 if (thread_data->maps) {
1025 thread_data->maps[tm] = &mmap[m];
1026 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1027 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1028 }
1029 if (thread_data->overwrite_maps) {
1030 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1031 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1032 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1033 }
1034 tm++;
1035 }
1036 }
1037
1038 return 0;
1039 }
1040
record__thread_data_init_pollfd(struct record_thread * thread_data,struct evlist * evlist)1041 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1042 {
1043 int f, tm, pos;
1044 struct mmap *map, *overwrite_map;
1045
1046 fdarray__init(&thread_data->pollfd, 64);
1047
1048 for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1049 map = thread_data->maps ? thread_data->maps[tm] : NULL;
1050 overwrite_map = thread_data->overwrite_maps ?
1051 thread_data->overwrite_maps[tm] : NULL;
1052
1053 for (f = 0; f < evlist->core.pollfd.nr; f++) {
1054 void *ptr = evlist->core.pollfd.priv[f].ptr;
1055
1056 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1057 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1058 &evlist->core.pollfd);
1059 if (pos < 0)
1060 return pos;
1061 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1062 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1063 }
1064 }
1065 }
1066
1067 return 0;
1068 }
1069
record__free_thread_data(struct record * rec)1070 static void record__free_thread_data(struct record *rec)
1071 {
1072 int t;
1073 struct record_thread *thread_data = rec->thread_data;
1074
1075 if (thread_data == NULL)
1076 return;
1077
1078 for (t = 0; t < rec->nr_threads; t++) {
1079 record__thread_data_close_pipes(&thread_data[t]);
1080 zfree(&thread_data[t].maps);
1081 zfree(&thread_data[t].overwrite_maps);
1082 fdarray__exit(&thread_data[t].pollfd);
1083 }
1084
1085 zfree(&rec->thread_data);
1086 }
1087
record__map_thread_evlist_pollfd_indexes(struct record * rec,int evlist_pollfd_index,int thread_pollfd_index)1088 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1089 int evlist_pollfd_index,
1090 int thread_pollfd_index)
1091 {
1092 size_t x = rec->index_map_cnt;
1093
1094 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1095 return -ENOMEM;
1096 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1097 rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1098 rec->index_map_cnt += 1;
1099 return 0;
1100 }
1101
record__update_evlist_pollfd_from_thread(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1102 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1103 struct evlist *evlist,
1104 struct record_thread *thread_data)
1105 {
1106 struct pollfd *e_entries = evlist->core.pollfd.entries;
1107 struct pollfd *t_entries = thread_data->pollfd.entries;
1108 int err = 0;
1109 size_t i;
1110
1111 for (i = 0; i < rec->index_map_cnt; i++) {
1112 int e_pos = rec->index_map[i].evlist_pollfd_index;
1113 int t_pos = rec->index_map[i].thread_pollfd_index;
1114
1115 if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1116 e_entries[e_pos].events != t_entries[t_pos].events) {
1117 pr_err("Thread and evlist pollfd index mismatch\n");
1118 err = -EINVAL;
1119 continue;
1120 }
1121 e_entries[e_pos].revents = t_entries[t_pos].revents;
1122 }
1123 return err;
1124 }
1125
record__dup_non_perf_events(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1126 static int record__dup_non_perf_events(struct record *rec,
1127 struct evlist *evlist,
1128 struct record_thread *thread_data)
1129 {
1130 struct fdarray *fda = &evlist->core.pollfd;
1131 int i, ret;
1132
1133 for (i = 0; i < fda->nr; i++) {
1134 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1135 continue;
1136 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1137 if (ret < 0) {
1138 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1139 return ret;
1140 }
1141 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1142 thread_data, ret, fda->entries[i].fd);
1143 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1144 if (ret < 0) {
1145 pr_err("Failed to map thread and evlist pollfd indexes\n");
1146 return ret;
1147 }
1148 }
1149 return 0;
1150 }
1151
record__alloc_thread_data(struct record * rec,struct evlist * evlist)1152 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1153 {
1154 int t, ret;
1155 struct record_thread *thread_data;
1156
1157 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1158 if (!rec->thread_data) {
1159 pr_err("Failed to allocate thread data\n");
1160 return -ENOMEM;
1161 }
1162 thread_data = rec->thread_data;
1163
1164 for (t = 0; t < rec->nr_threads; t++)
1165 record__thread_data_init_pipes(&thread_data[t]);
1166
1167 for (t = 0; t < rec->nr_threads; t++) {
1168 thread_data[t].rec = rec;
1169 thread_data[t].mask = &rec->thread_masks[t];
1170 ret = record__thread_data_init_maps(&thread_data[t], evlist);
1171 if (ret) {
1172 pr_err("Failed to initialize thread[%d] maps\n", t);
1173 goto out_free;
1174 }
1175 ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1176 if (ret) {
1177 pr_err("Failed to initialize thread[%d] pollfd\n", t);
1178 goto out_free;
1179 }
1180 if (t) {
1181 thread_data[t].tid = -1;
1182 ret = record__thread_data_open_pipes(&thread_data[t]);
1183 if (ret) {
1184 pr_err("Failed to open thread[%d] communication pipes\n", t);
1185 goto out_free;
1186 }
1187 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1188 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1189 if (ret < 0) {
1190 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1191 goto out_free;
1192 }
1193 thread_data[t].ctlfd_pos = ret;
1194 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1195 thread_data, thread_data[t].ctlfd_pos,
1196 thread_data[t].pipes.msg[0]);
1197 } else {
1198 thread_data[t].tid = gettid();
1199
1200 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1201 if (ret < 0)
1202 goto out_free;
1203
1204 thread_data[t].ctlfd_pos = -1; /* Not used */
1205 }
1206 }
1207
1208 return 0;
1209
1210 out_free:
1211 record__free_thread_data(rec);
1212
1213 return ret;
1214 }
1215
record__mmap_evlist(struct record * rec,struct evlist * evlist)1216 static int record__mmap_evlist(struct record *rec,
1217 struct evlist *evlist)
1218 {
1219 int i, ret;
1220 struct record_opts *opts = &rec->opts;
1221 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1222 opts->auxtrace_sample_mode;
1223 char msg[512];
1224
1225 if (opts->affinity != PERF_AFFINITY_SYS)
1226 cpu__setup_cpunode_map();
1227
1228 if (evlist__mmap_ex(evlist, opts->mmap_pages,
1229 opts->auxtrace_mmap_pages,
1230 auxtrace_overwrite,
1231 opts->nr_cblocks, opts->affinity,
1232 opts->mmap_flush, opts->comp_level) < 0) {
1233 if (errno == EPERM) {
1234 pr_err("Permission error mapping pages.\n"
1235 "Consider increasing "
1236 "/proc/sys/kernel/perf_event_mlock_kb,\n"
1237 "or try again with a smaller value of -m/--mmap_pages.\n"
1238 "(current value: %u,%u)\n",
1239 opts->mmap_pages, opts->auxtrace_mmap_pages);
1240 return -errno;
1241 } else {
1242 pr_err("failed to mmap with %d (%s)\n", errno,
1243 str_error_r(errno, msg, sizeof(msg)));
1244 if (errno)
1245 return -errno;
1246 else
1247 return -EINVAL;
1248 }
1249 }
1250
1251 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1252 return -1;
1253
1254 ret = record__alloc_thread_data(rec, evlist);
1255 if (ret)
1256 return ret;
1257
1258 if (record__threads_enabled(rec)) {
1259 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1260 if (ret) {
1261 pr_err("Failed to create data directory: %s\n", strerror(-ret));
1262 return ret;
1263 }
1264 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1265 if (evlist->mmap)
1266 evlist->mmap[i].file = &rec->data.dir.files[i];
1267 if (evlist->overwrite_mmap)
1268 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1269 }
1270 }
1271
1272 return 0;
1273 }
1274
record__mmap(struct record * rec)1275 static int record__mmap(struct record *rec)
1276 {
1277 return record__mmap_evlist(rec, rec->evlist);
1278 }
1279
record__open(struct record * rec)1280 static int record__open(struct record *rec)
1281 {
1282 char msg[BUFSIZ];
1283 struct evsel *pos;
1284 struct evlist *evlist = rec->evlist;
1285 struct perf_session *session = rec->session;
1286 struct record_opts *opts = &rec->opts;
1287 int rc = 0;
1288
1289 /*
1290 * For initial_delay, system wide or a hybrid system, we need to add a
1291 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1292 * of waiting or event synthesis.
1293 */
1294 if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
1295 perf_pmus__num_core_pmus() > 1) {
1296 pos = evlist__get_tracking_event(evlist);
1297 if (!evsel__is_dummy_event(pos)) {
1298 /* Set up dummy event. */
1299 if (evlist__add_dummy(evlist))
1300 return -ENOMEM;
1301 pos = evlist__last(evlist);
1302 evlist__set_tracking_event(evlist, pos);
1303 }
1304
1305 /*
1306 * Enable the dummy event when the process is forked for
1307 * initial_delay, immediately for system wide.
1308 */
1309 if (opts->target.initial_delay && !pos->immediate &&
1310 !target__has_cpu(&opts->target))
1311 pos->core.attr.enable_on_exec = 1;
1312 else
1313 pos->immediate = 1;
1314 }
1315
1316 evlist__config(evlist, opts, &callchain_param);
1317
1318 evlist__for_each_entry(evlist, pos) {
1319 try_again:
1320 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1321 if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1322 if (verbose > 0)
1323 ui__warning("%s\n", msg);
1324 goto try_again;
1325 }
1326 if ((errno == EINVAL || errno == EBADF) &&
1327 pos->core.leader != &pos->core &&
1328 pos->weak_group) {
1329 pos = evlist__reset_weak_group(evlist, pos, true);
1330 goto try_again;
1331 }
1332 rc = -errno;
1333 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1334 ui__error("%s\n", msg);
1335 goto out;
1336 }
1337
1338 pos->supported = true;
1339 }
1340
1341 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1342 pr_warning(
1343 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1344 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1345 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1346 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1347 "Samples in kernel modules won't be resolved at all.\n\n"
1348 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1349 "even with a suitable vmlinux or kallsyms file.\n\n");
1350 }
1351
1352 if (evlist__apply_filters(evlist, &pos)) {
1353 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1354 pos->filter ?: "BPF", evsel__name(pos), errno,
1355 str_error_r(errno, msg, sizeof(msg)));
1356 rc = -1;
1357 goto out;
1358 }
1359
1360 rc = record__mmap(rec);
1361 if (rc)
1362 goto out;
1363
1364 session->evlist = evlist;
1365 perf_session__set_id_hdr_size(session);
1366 out:
1367 return rc;
1368 }
1369
set_timestamp_boundary(struct record * rec,u64 sample_time)1370 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1371 {
1372 if (rec->evlist->first_sample_time == 0)
1373 rec->evlist->first_sample_time = sample_time;
1374
1375 if (sample_time)
1376 rec->evlist->last_sample_time = sample_time;
1377 }
1378
process_sample_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1379 static int process_sample_event(struct perf_tool *tool,
1380 union perf_event *event,
1381 struct perf_sample *sample,
1382 struct evsel *evsel,
1383 struct machine *machine)
1384 {
1385 struct record *rec = container_of(tool, struct record, tool);
1386
1387 set_timestamp_boundary(rec, sample->time);
1388
1389 if (rec->buildid_all)
1390 return 0;
1391
1392 rec->samples++;
1393 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1394 }
1395
process_buildids(struct record * rec)1396 static int process_buildids(struct record *rec)
1397 {
1398 struct perf_session *session = rec->session;
1399
1400 if (perf_data__size(&rec->data) == 0)
1401 return 0;
1402
1403 /*
1404 * During this process, it'll load kernel map and replace the
1405 * dso->long_name to a real pathname it found. In this case
1406 * we prefer the vmlinux path like
1407 * /lib/modules/3.16.4/build/vmlinux
1408 *
1409 * rather than build-id path (in debug directory).
1410 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1411 */
1412 symbol_conf.ignore_vmlinux_buildid = true;
1413
1414 /*
1415 * If --buildid-all is given, it marks all DSO regardless of hits,
1416 * so no need to process samples. But if timestamp_boundary is enabled,
1417 * it still needs to walk on all samples to get the timestamps of
1418 * first/last samples.
1419 */
1420 if (rec->buildid_all && !rec->timestamp_boundary)
1421 rec->tool.sample = NULL;
1422
1423 return perf_session__process_events(session);
1424 }
1425
perf_event__synthesize_guest_os(struct machine * machine,void * data)1426 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1427 {
1428 int err;
1429 struct perf_tool *tool = data;
1430 /*
1431 *As for guest kernel when processing subcommand record&report,
1432 *we arrange module mmap prior to guest kernel mmap and trigger
1433 *a preload dso because default guest module symbols are loaded
1434 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1435 *method is used to avoid symbol missing when the first addr is
1436 *in module instead of in guest kernel.
1437 */
1438 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1439 machine);
1440 if (err < 0)
1441 pr_err("Couldn't record guest kernel [%d]'s reference"
1442 " relocation symbol.\n", machine->pid);
1443
1444 /*
1445 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1446 * have no _text sometimes.
1447 */
1448 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1449 machine);
1450 if (err < 0)
1451 pr_err("Couldn't record guest kernel [%d]'s reference"
1452 " relocation symbol.\n", machine->pid);
1453 }
1454
1455 static struct perf_event_header finished_round_event = {
1456 .size = sizeof(struct perf_event_header),
1457 .type = PERF_RECORD_FINISHED_ROUND,
1458 };
1459
1460 static struct perf_event_header finished_init_event = {
1461 .size = sizeof(struct perf_event_header),
1462 .type = PERF_RECORD_FINISHED_INIT,
1463 };
1464
record__adjust_affinity(struct record * rec,struct mmap * map)1465 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1466 {
1467 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1468 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1469 thread->mask->affinity.nbits)) {
1470 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1471 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1472 map->affinity_mask.bits, thread->mask->affinity.nbits);
1473 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1474 (cpu_set_t *)thread->mask->affinity.bits);
1475 if (verbose == 2) {
1476 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1477 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1478 }
1479 }
1480 }
1481
process_comp_header(void * record,size_t increment)1482 static size_t process_comp_header(void *record, size_t increment)
1483 {
1484 struct perf_record_compressed *event = record;
1485 size_t size = sizeof(*event);
1486
1487 if (increment) {
1488 event->header.size += increment;
1489 return increment;
1490 }
1491
1492 event->header.type = PERF_RECORD_COMPRESSED;
1493 event->header.size = size;
1494
1495 return size;
1496 }
1497
zstd_compress(struct perf_session * session,struct mmap * map,void * dst,size_t dst_size,void * src,size_t src_size)1498 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1499 void *dst, size_t dst_size, void *src, size_t src_size)
1500 {
1501 size_t compressed;
1502 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1503 struct zstd_data *zstd_data = &session->zstd_data;
1504
1505 if (map && map->file)
1506 zstd_data = &map->zstd_data;
1507
1508 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1509 max_record_size, process_comp_header);
1510
1511 if (map && map->file) {
1512 thread->bytes_transferred += src_size;
1513 thread->bytes_compressed += compressed;
1514 } else {
1515 session->bytes_transferred += src_size;
1516 session->bytes_compressed += compressed;
1517 }
1518
1519 return compressed;
1520 }
1521
record__mmap_read_evlist(struct record * rec,struct evlist * evlist,bool overwrite,bool synch)1522 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1523 bool overwrite, bool synch)
1524 {
1525 u64 bytes_written = rec->bytes_written;
1526 int i;
1527 int rc = 0;
1528 int nr_mmaps;
1529 struct mmap **maps;
1530 int trace_fd = rec->data.file.fd;
1531 off_t off = 0;
1532
1533 if (!evlist)
1534 return 0;
1535
1536 nr_mmaps = thread->nr_mmaps;
1537 maps = overwrite ? thread->overwrite_maps : thread->maps;
1538
1539 if (!maps)
1540 return 0;
1541
1542 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1543 return 0;
1544
1545 if (record__aio_enabled(rec))
1546 off = record__aio_get_pos(trace_fd);
1547
1548 for (i = 0; i < nr_mmaps; i++) {
1549 u64 flush = 0;
1550 struct mmap *map = maps[i];
1551
1552 if (map->core.base) {
1553 record__adjust_affinity(rec, map);
1554 if (synch) {
1555 flush = map->core.flush;
1556 map->core.flush = 1;
1557 }
1558 if (!record__aio_enabled(rec)) {
1559 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1560 if (synch)
1561 map->core.flush = flush;
1562 rc = -1;
1563 goto out;
1564 }
1565 } else {
1566 if (record__aio_push(rec, map, &off) < 0) {
1567 record__aio_set_pos(trace_fd, off);
1568 if (synch)
1569 map->core.flush = flush;
1570 rc = -1;
1571 goto out;
1572 }
1573 }
1574 if (synch)
1575 map->core.flush = flush;
1576 }
1577
1578 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1579 !rec->opts.auxtrace_sample_mode &&
1580 record__auxtrace_mmap_read(rec, map) != 0) {
1581 rc = -1;
1582 goto out;
1583 }
1584 }
1585
1586 if (record__aio_enabled(rec))
1587 record__aio_set_pos(trace_fd, off);
1588
1589 /*
1590 * Mark the round finished in case we wrote
1591 * at least one event.
1592 *
1593 * No need for round events in directory mode,
1594 * because per-cpu maps and files have data
1595 * sorted by kernel.
1596 */
1597 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1598 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1599
1600 if (overwrite)
1601 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1602 out:
1603 return rc;
1604 }
1605
record__mmap_read_all(struct record * rec,bool synch)1606 static int record__mmap_read_all(struct record *rec, bool synch)
1607 {
1608 int err;
1609
1610 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1611 if (err)
1612 return err;
1613
1614 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1615 }
1616
record__thread_munmap_filtered(struct fdarray * fda,int fd,void * arg __maybe_unused)1617 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1618 void *arg __maybe_unused)
1619 {
1620 struct perf_mmap *map = fda->priv[fd].ptr;
1621
1622 if (map)
1623 perf_mmap__put(map);
1624 }
1625
record__thread(void * arg)1626 static void *record__thread(void *arg)
1627 {
1628 enum thread_msg msg = THREAD_MSG__READY;
1629 bool terminate = false;
1630 struct fdarray *pollfd;
1631 int err, ctlfd_pos;
1632
1633 thread = arg;
1634 thread->tid = gettid();
1635
1636 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1637 if (err == -1)
1638 pr_warning("threads[%d]: failed to notify on start: %s\n",
1639 thread->tid, strerror(errno));
1640
1641 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1642
1643 pollfd = &thread->pollfd;
1644 ctlfd_pos = thread->ctlfd_pos;
1645
1646 for (;;) {
1647 unsigned long long hits = thread->samples;
1648
1649 if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1650 break;
1651
1652 if (hits == thread->samples) {
1653
1654 err = fdarray__poll(pollfd, -1);
1655 /*
1656 * Propagate error, only if there's any. Ignore positive
1657 * number of returned events and interrupt error.
1658 */
1659 if (err > 0 || (err < 0 && errno == EINTR))
1660 err = 0;
1661 thread->waking++;
1662
1663 if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1664 record__thread_munmap_filtered, NULL) == 0)
1665 break;
1666 }
1667
1668 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1669 terminate = true;
1670 close(thread->pipes.msg[0]);
1671 thread->pipes.msg[0] = -1;
1672 pollfd->entries[ctlfd_pos].fd = -1;
1673 pollfd->entries[ctlfd_pos].events = 0;
1674 }
1675
1676 pollfd->entries[ctlfd_pos].revents = 0;
1677 }
1678 record__mmap_read_all(thread->rec, true);
1679
1680 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1681 if (err == -1)
1682 pr_warning("threads[%d]: failed to notify on termination: %s\n",
1683 thread->tid, strerror(errno));
1684
1685 return NULL;
1686 }
1687
record__init_features(struct record * rec)1688 static void record__init_features(struct record *rec)
1689 {
1690 struct perf_session *session = rec->session;
1691 int feat;
1692
1693 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1694 perf_header__set_feat(&session->header, feat);
1695
1696 if (rec->no_buildid)
1697 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1698
1699 #ifdef HAVE_LIBTRACEEVENT
1700 if (!have_tracepoints(&rec->evlist->core.entries))
1701 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1702 #endif
1703
1704 if (!rec->opts.branch_stack)
1705 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1706
1707 if (!rec->opts.full_auxtrace)
1708 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1709
1710 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1711 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1712
1713 if (!rec->opts.use_clockid)
1714 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1715
1716 if (!record__threads_enabled(rec))
1717 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1718
1719 if (!record__comp_enabled(rec))
1720 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1721
1722 perf_header__clear_feat(&session->header, HEADER_STAT);
1723 }
1724
1725 static void
record__finish_output(struct record * rec)1726 record__finish_output(struct record *rec)
1727 {
1728 int i;
1729 struct perf_data *data = &rec->data;
1730 int fd = perf_data__fd(data);
1731
1732 if (data->is_pipe)
1733 return;
1734
1735 rec->session->header.data_size += rec->bytes_written;
1736 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1737 if (record__threads_enabled(rec)) {
1738 for (i = 0; i < data->dir.nr; i++)
1739 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1740 }
1741
1742 if (!rec->no_buildid) {
1743 process_buildids(rec);
1744
1745 if (rec->buildid_all)
1746 dsos__hit_all(rec->session);
1747 }
1748 perf_session__write_header(rec->session, rec->evlist, fd, true);
1749
1750 return;
1751 }
1752
record__synthesize_workload(struct record * rec,bool tail)1753 static int record__synthesize_workload(struct record *rec, bool tail)
1754 {
1755 int err;
1756 struct perf_thread_map *thread_map;
1757 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1758
1759 if (rec->opts.tail_synthesize != tail)
1760 return 0;
1761
1762 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1763 if (thread_map == NULL)
1764 return -1;
1765
1766 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1767 process_synthesized_event,
1768 &rec->session->machines.host,
1769 needs_mmap,
1770 rec->opts.sample_address);
1771 perf_thread_map__put(thread_map);
1772 return err;
1773 }
1774
write_finished_init(struct record * rec,bool tail)1775 static int write_finished_init(struct record *rec, bool tail)
1776 {
1777 if (rec->opts.tail_synthesize != tail)
1778 return 0;
1779
1780 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1781 }
1782
1783 static int record__synthesize(struct record *rec, bool tail);
1784
1785 static int
record__switch_output(struct record * rec,bool at_exit)1786 record__switch_output(struct record *rec, bool at_exit)
1787 {
1788 struct perf_data *data = &rec->data;
1789 int fd, err;
1790 char *new_filename;
1791
1792 /* Same Size: "2015122520103046"*/
1793 char timestamp[] = "InvalidTimestamp";
1794
1795 record__aio_mmap_read_sync(rec);
1796
1797 write_finished_init(rec, true);
1798
1799 record__synthesize(rec, true);
1800 if (target__none(&rec->opts.target))
1801 record__synthesize_workload(rec, true);
1802
1803 rec->samples = 0;
1804 record__finish_output(rec);
1805 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1806 if (err) {
1807 pr_err("Failed to get current timestamp\n");
1808 return -EINVAL;
1809 }
1810
1811 fd = perf_data__switch(data, timestamp,
1812 rec->session->header.data_offset,
1813 at_exit, &new_filename);
1814 if (fd >= 0 && !at_exit) {
1815 rec->bytes_written = 0;
1816 rec->session->header.data_size = 0;
1817 }
1818
1819 if (!quiet)
1820 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1821 data->path, timestamp);
1822
1823 if (rec->switch_output.num_files) {
1824 int n = rec->switch_output.cur_file + 1;
1825
1826 if (n >= rec->switch_output.num_files)
1827 n = 0;
1828 rec->switch_output.cur_file = n;
1829 if (rec->switch_output.filenames[n]) {
1830 remove(rec->switch_output.filenames[n]);
1831 zfree(&rec->switch_output.filenames[n]);
1832 }
1833 rec->switch_output.filenames[n] = new_filename;
1834 } else {
1835 free(new_filename);
1836 }
1837
1838 /* Output tracking events */
1839 if (!at_exit) {
1840 record__synthesize(rec, false);
1841
1842 /*
1843 * In 'perf record --switch-output' without -a,
1844 * record__synthesize() in record__switch_output() won't
1845 * generate tracking events because there's no thread_map
1846 * in evlist. Which causes newly created perf.data doesn't
1847 * contain map and comm information.
1848 * Create a fake thread_map and directly call
1849 * perf_event__synthesize_thread_map() for those events.
1850 */
1851 if (target__none(&rec->opts.target))
1852 record__synthesize_workload(rec, false);
1853 write_finished_init(rec, false);
1854 }
1855 return fd;
1856 }
1857
__record__save_lost_samples(struct record * rec,struct evsel * evsel,struct perf_record_lost_samples * lost,int cpu_idx,int thread_idx,u64 lost_count,u16 misc_flag)1858 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1859 struct perf_record_lost_samples *lost,
1860 int cpu_idx, int thread_idx, u64 lost_count,
1861 u16 misc_flag)
1862 {
1863 struct perf_sample_id *sid;
1864 struct perf_sample sample = {};
1865 int id_hdr_size;
1866
1867 lost->lost = lost_count;
1868 if (evsel->core.ids) {
1869 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1870 sample.id = sid->id;
1871 }
1872
1873 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1874 evsel->core.attr.sample_type, &sample);
1875 lost->header.size = sizeof(*lost) + id_hdr_size;
1876 lost->header.misc = misc_flag;
1877 record__write(rec, NULL, lost, lost->header.size);
1878 }
1879
record__read_lost_samples(struct record * rec)1880 static void record__read_lost_samples(struct record *rec)
1881 {
1882 struct perf_session *session = rec->session;
1883 struct perf_record_lost_samples *lost;
1884 struct evsel *evsel;
1885
1886 /* there was an error during record__open */
1887 if (session->evlist == NULL)
1888 return;
1889
1890 lost = zalloc(PERF_SAMPLE_MAX_SIZE);
1891 if (lost == NULL) {
1892 pr_debug("Memory allocation failed\n");
1893 return;
1894 }
1895
1896 lost->header.type = PERF_RECORD_LOST_SAMPLES;
1897
1898 evlist__for_each_entry(session->evlist, evsel) {
1899 struct xyarray *xy = evsel->core.sample_id;
1900 u64 lost_count;
1901
1902 if (xy == NULL || evsel->core.fd == NULL)
1903 continue;
1904 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
1905 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
1906 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
1907 continue;
1908 }
1909
1910 for (int x = 0; x < xyarray__max_x(xy); x++) {
1911 for (int y = 0; y < xyarray__max_y(xy); y++) {
1912 struct perf_counts_values count;
1913
1914 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
1915 pr_debug("read LOST count failed\n");
1916 goto out;
1917 }
1918
1919 if (count.lost) {
1920 __record__save_lost_samples(rec, evsel, lost,
1921 x, y, count.lost, 0);
1922 }
1923 }
1924 }
1925
1926 lost_count = perf_bpf_filter__lost_count(evsel);
1927 if (lost_count)
1928 __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
1929 PERF_RECORD_MISC_LOST_SAMPLES_BPF);
1930 }
1931 out:
1932 free(lost);
1933 }
1934
1935 static volatile sig_atomic_t workload_exec_errno;
1936
1937 /*
1938 * evlist__prepare_workload will send a SIGUSR1
1939 * if the fork fails, since we asked by setting its
1940 * want_signal to true.
1941 */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)1942 static void workload_exec_failed_signal(int signo __maybe_unused,
1943 siginfo_t *info,
1944 void *ucontext __maybe_unused)
1945 {
1946 workload_exec_errno = info->si_value.sival_int;
1947 done = 1;
1948 child_finished = 1;
1949 }
1950
1951 static void snapshot_sig_handler(int sig);
1952 static void alarm_sig_handler(int sig);
1953
evlist__pick_pc(struct evlist * evlist)1954 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1955 {
1956 if (evlist) {
1957 if (evlist->mmap && evlist->mmap[0].core.base)
1958 return evlist->mmap[0].core.base;
1959 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1960 return evlist->overwrite_mmap[0].core.base;
1961 }
1962 return NULL;
1963 }
1964
record__pick_pc(struct record * rec)1965 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1966 {
1967 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1968 if (pc)
1969 return pc;
1970 return NULL;
1971 }
1972
record__synthesize(struct record * rec,bool tail)1973 static int record__synthesize(struct record *rec, bool tail)
1974 {
1975 struct perf_session *session = rec->session;
1976 struct machine *machine = &session->machines.host;
1977 struct perf_data *data = &rec->data;
1978 struct record_opts *opts = &rec->opts;
1979 struct perf_tool *tool = &rec->tool;
1980 int err = 0;
1981 event_op f = process_synthesized_event;
1982
1983 if (rec->opts.tail_synthesize != tail)
1984 return 0;
1985
1986 if (data->is_pipe) {
1987 err = perf_event__synthesize_for_pipe(tool, session, data,
1988 process_synthesized_event);
1989 if (err < 0)
1990 goto out;
1991
1992 rec->bytes_written += err;
1993 }
1994
1995 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1996 process_synthesized_event, machine);
1997 if (err)
1998 goto out;
1999
2000 /* Synthesize id_index before auxtrace_info */
2001 err = perf_event__synthesize_id_index(tool,
2002 process_synthesized_event,
2003 session->evlist, machine);
2004 if (err)
2005 goto out;
2006
2007 if (rec->opts.full_auxtrace) {
2008 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2009 session, process_synthesized_event);
2010 if (err)
2011 goto out;
2012 }
2013
2014 if (!evlist__exclude_kernel(rec->evlist)) {
2015 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2016 machine);
2017 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2018 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2019 "Check /proc/kallsyms permission or run as root.\n");
2020
2021 err = perf_event__synthesize_modules(tool, process_synthesized_event,
2022 machine);
2023 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2024 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2025 "Check /proc/modules permission or run as root.\n");
2026 }
2027
2028 if (perf_guest) {
2029 machines__process_guests(&session->machines,
2030 perf_event__synthesize_guest_os, tool);
2031 }
2032
2033 err = perf_event__synthesize_extra_attr(&rec->tool,
2034 rec->evlist,
2035 process_synthesized_event,
2036 data->is_pipe);
2037 if (err)
2038 goto out;
2039
2040 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2041 process_synthesized_event,
2042 NULL);
2043 if (err < 0) {
2044 pr_err("Couldn't synthesize thread map.\n");
2045 return err;
2046 }
2047
2048 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2049 process_synthesized_event, NULL);
2050 if (err < 0) {
2051 pr_err("Couldn't synthesize cpu map.\n");
2052 return err;
2053 }
2054
2055 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2056 machine, opts);
2057 if (err < 0) {
2058 pr_warning("Couldn't synthesize bpf events.\n");
2059 err = 0;
2060 }
2061
2062 if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2063 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2064 machine);
2065 if (err < 0) {
2066 pr_warning("Couldn't synthesize cgroup events.\n");
2067 err = 0;
2068 }
2069 }
2070
2071 if (rec->opts.nr_threads_synthesize > 1) {
2072 mutex_init(&synth_lock);
2073 perf_set_multithreaded();
2074 f = process_locked_synthesized_event;
2075 }
2076
2077 if (rec->opts.synth & PERF_SYNTH_TASK) {
2078 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2079
2080 err = __machine__synthesize_threads(machine, tool, &opts->target,
2081 rec->evlist->core.threads,
2082 f, needs_mmap, opts->sample_address,
2083 rec->opts.nr_threads_synthesize);
2084 }
2085
2086 if (rec->opts.nr_threads_synthesize > 1) {
2087 perf_set_singlethreaded();
2088 mutex_destroy(&synth_lock);
2089 }
2090
2091 out:
2092 return err;
2093 }
2094
record__process_signal_event(union perf_event * event __maybe_unused,void * data)2095 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2096 {
2097 struct record *rec = data;
2098 pthread_kill(rec->thread_id, SIGUSR2);
2099 return 0;
2100 }
2101
record__setup_sb_evlist(struct record * rec)2102 static int record__setup_sb_evlist(struct record *rec)
2103 {
2104 struct record_opts *opts = &rec->opts;
2105
2106 if (rec->sb_evlist != NULL) {
2107 /*
2108 * We get here if --switch-output-event populated the
2109 * sb_evlist, so associate a callback that will send a SIGUSR2
2110 * to the main thread.
2111 */
2112 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2113 rec->thread_id = pthread_self();
2114 }
2115 #ifdef HAVE_LIBBPF_SUPPORT
2116 if (!opts->no_bpf_event) {
2117 if (rec->sb_evlist == NULL) {
2118 rec->sb_evlist = evlist__new();
2119
2120 if (rec->sb_evlist == NULL) {
2121 pr_err("Couldn't create side band evlist.\n.");
2122 return -1;
2123 }
2124 }
2125
2126 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
2127 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2128 return -1;
2129 }
2130 }
2131 #endif
2132 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2133 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2134 opts->no_bpf_event = true;
2135 }
2136
2137 return 0;
2138 }
2139
record__init_clock(struct record * rec)2140 static int record__init_clock(struct record *rec)
2141 {
2142 struct perf_session *session = rec->session;
2143 struct timespec ref_clockid;
2144 struct timeval ref_tod;
2145 u64 ref;
2146
2147 if (!rec->opts.use_clockid)
2148 return 0;
2149
2150 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2151 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
2152
2153 session->header.env.clock.clockid = rec->opts.clockid;
2154
2155 if (gettimeofday(&ref_tod, NULL) != 0) {
2156 pr_err("gettimeofday failed, cannot set reference time.\n");
2157 return -1;
2158 }
2159
2160 if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2161 pr_err("clock_gettime failed, cannot set reference time.\n");
2162 return -1;
2163 }
2164
2165 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2166 (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2167
2168 session->header.env.clock.tod_ns = ref;
2169
2170 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2171 (u64) ref_clockid.tv_nsec;
2172
2173 session->header.env.clock.clockid_ns = ref;
2174 return 0;
2175 }
2176
hit_auxtrace_snapshot_trigger(struct record * rec)2177 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2178 {
2179 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2180 trigger_hit(&auxtrace_snapshot_trigger);
2181 auxtrace_record__snapshot_started = 1;
2182 if (auxtrace_record__snapshot_start(rec->itr))
2183 trigger_error(&auxtrace_snapshot_trigger);
2184 }
2185 }
2186
record__uniquify_name(struct record * rec)2187 static void record__uniquify_name(struct record *rec)
2188 {
2189 struct evsel *pos;
2190 struct evlist *evlist = rec->evlist;
2191 char *new_name;
2192 int ret;
2193
2194 if (perf_pmus__num_core_pmus() == 1)
2195 return;
2196
2197 evlist__for_each_entry(evlist, pos) {
2198 if (!evsel__is_hybrid(pos))
2199 continue;
2200
2201 if (strchr(pos->name, '/'))
2202 continue;
2203
2204 ret = asprintf(&new_name, "%s/%s/",
2205 pos->pmu_name, pos->name);
2206 if (ret) {
2207 free(pos->name);
2208 pos->name = new_name;
2209 }
2210 }
2211 }
2212
record__terminate_thread(struct record_thread * thread_data)2213 static int record__terminate_thread(struct record_thread *thread_data)
2214 {
2215 int err;
2216 enum thread_msg ack = THREAD_MSG__UNDEFINED;
2217 pid_t tid = thread_data->tid;
2218
2219 close(thread_data->pipes.msg[1]);
2220 thread_data->pipes.msg[1] = -1;
2221 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2222 if (err > 0)
2223 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2224 else
2225 pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2226 thread->tid, tid);
2227
2228 return 0;
2229 }
2230
record__start_threads(struct record * rec)2231 static int record__start_threads(struct record *rec)
2232 {
2233 int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2234 struct record_thread *thread_data = rec->thread_data;
2235 sigset_t full, mask;
2236 pthread_t handle;
2237 pthread_attr_t attrs;
2238
2239 thread = &thread_data[0];
2240
2241 if (!record__threads_enabled(rec))
2242 return 0;
2243
2244 sigfillset(&full);
2245 if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2246 pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2247 return -1;
2248 }
2249
2250 pthread_attr_init(&attrs);
2251 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2252
2253 for (t = 1; t < nr_threads; t++) {
2254 enum thread_msg msg = THREAD_MSG__UNDEFINED;
2255
2256 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2257 pthread_attr_setaffinity_np(&attrs,
2258 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2259 (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2260 #endif
2261 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2262 for (tt = 1; tt < t; tt++)
2263 record__terminate_thread(&thread_data[t]);
2264 pr_err("Failed to start threads: %s\n", strerror(errno));
2265 ret = -1;
2266 goto out_err;
2267 }
2268
2269 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2270 if (err > 0)
2271 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2272 thread_msg_tags[msg]);
2273 else
2274 pr_warning("threads[%d]: failed to receive start notification from %d\n",
2275 thread->tid, rec->thread_data[t].tid);
2276 }
2277
2278 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2279 (cpu_set_t *)thread->mask->affinity.bits);
2280
2281 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2282
2283 out_err:
2284 pthread_attr_destroy(&attrs);
2285
2286 if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2287 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2288 ret = -1;
2289 }
2290
2291 return ret;
2292 }
2293
record__stop_threads(struct record * rec)2294 static int record__stop_threads(struct record *rec)
2295 {
2296 int t;
2297 struct record_thread *thread_data = rec->thread_data;
2298
2299 for (t = 1; t < rec->nr_threads; t++)
2300 record__terminate_thread(&thread_data[t]);
2301
2302 for (t = 0; t < rec->nr_threads; t++) {
2303 rec->samples += thread_data[t].samples;
2304 if (!record__threads_enabled(rec))
2305 continue;
2306 rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2307 rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2308 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2309 thread_data[t].samples, thread_data[t].waking);
2310 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2311 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2312 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2313 else
2314 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2315 }
2316
2317 return 0;
2318 }
2319
record__waking(struct record * rec)2320 static unsigned long record__waking(struct record *rec)
2321 {
2322 int t;
2323 unsigned long waking = 0;
2324 struct record_thread *thread_data = rec->thread_data;
2325
2326 for (t = 0; t < rec->nr_threads; t++)
2327 waking += thread_data[t].waking;
2328
2329 return waking;
2330 }
2331
__cmd_record(struct record * rec,int argc,const char ** argv)2332 static int __cmd_record(struct record *rec, int argc, const char **argv)
2333 {
2334 int err;
2335 int status = 0;
2336 const bool forks = argc > 0;
2337 struct perf_tool *tool = &rec->tool;
2338 struct record_opts *opts = &rec->opts;
2339 struct perf_data *data = &rec->data;
2340 struct perf_session *session;
2341 bool disabled = false, draining = false;
2342 int fd;
2343 float ratio = 0;
2344 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2345
2346 atexit(record__sig_exit);
2347 signal(SIGCHLD, sig_handler);
2348 signal(SIGINT, sig_handler);
2349 signal(SIGTERM, sig_handler);
2350 signal(SIGSEGV, sigsegv_handler);
2351
2352 if (rec->opts.record_namespaces)
2353 tool->namespace_events = true;
2354
2355 if (rec->opts.record_cgroup) {
2356 #ifdef HAVE_FILE_HANDLE
2357 tool->cgroup_events = true;
2358 #else
2359 pr_err("cgroup tracking is not supported\n");
2360 return -1;
2361 #endif
2362 }
2363
2364 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2365 signal(SIGUSR2, snapshot_sig_handler);
2366 if (rec->opts.auxtrace_snapshot_mode)
2367 trigger_on(&auxtrace_snapshot_trigger);
2368 if (rec->switch_output.enabled)
2369 trigger_on(&switch_output_trigger);
2370 } else {
2371 signal(SIGUSR2, SIG_IGN);
2372 }
2373
2374 session = perf_session__new(data, tool);
2375 if (IS_ERR(session)) {
2376 pr_err("Perf session creation failed.\n");
2377 return PTR_ERR(session);
2378 }
2379
2380 if (record__threads_enabled(rec)) {
2381 if (perf_data__is_pipe(&rec->data)) {
2382 pr_err("Parallel trace streaming is not available in pipe mode.\n");
2383 return -1;
2384 }
2385 if (rec->opts.full_auxtrace) {
2386 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2387 return -1;
2388 }
2389 }
2390
2391 fd = perf_data__fd(data);
2392 rec->session = session;
2393
2394 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2395 pr_err("Compression initialization failed.\n");
2396 return -1;
2397 }
2398 #ifdef HAVE_EVENTFD_SUPPORT
2399 done_fd = eventfd(0, EFD_NONBLOCK);
2400 if (done_fd < 0) {
2401 pr_err("Failed to create wakeup eventfd, error: %m\n");
2402 status = -1;
2403 goto out_delete_session;
2404 }
2405 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2406 if (err < 0) {
2407 pr_err("Failed to add wakeup eventfd to poll list\n");
2408 status = err;
2409 goto out_delete_session;
2410 }
2411 #endif // HAVE_EVENTFD_SUPPORT
2412
2413 session->header.env.comp_type = PERF_COMP_ZSTD;
2414 session->header.env.comp_level = rec->opts.comp_level;
2415
2416 if (rec->opts.kcore &&
2417 !record__kcore_readable(&session->machines.host)) {
2418 pr_err("ERROR: kcore is not readable.\n");
2419 return -1;
2420 }
2421
2422 if (record__init_clock(rec))
2423 return -1;
2424
2425 record__init_features(rec);
2426
2427 if (forks) {
2428 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2429 workload_exec_failed_signal);
2430 if (err < 0) {
2431 pr_err("Couldn't run the workload!\n");
2432 status = err;
2433 goto out_delete_session;
2434 }
2435 }
2436
2437 /*
2438 * If we have just single event and are sending data
2439 * through pipe, we need to force the ids allocation,
2440 * because we synthesize event name through the pipe
2441 * and need the id for that.
2442 */
2443 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2444 rec->opts.sample_id = true;
2445
2446 record__uniquify_name(rec);
2447
2448 /* Debug message used by test scripts */
2449 pr_debug3("perf record opening and mmapping events\n");
2450 if (record__open(rec) != 0) {
2451 err = -1;
2452 goto out_free_threads;
2453 }
2454 /* Debug message used by test scripts */
2455 pr_debug3("perf record done opening and mmapping events\n");
2456 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2457
2458 if (rec->opts.kcore) {
2459 err = record__kcore_copy(&session->machines.host, data);
2460 if (err) {
2461 pr_err("ERROR: Failed to copy kcore\n");
2462 goto out_free_threads;
2463 }
2464 }
2465
2466 /*
2467 * Normally perf_session__new would do this, but it doesn't have the
2468 * evlist.
2469 */
2470 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2471 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2472 rec->tool.ordered_events = false;
2473 }
2474
2475 if (evlist__nr_groups(rec->evlist) == 0)
2476 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2477
2478 if (data->is_pipe) {
2479 err = perf_header__write_pipe(fd);
2480 if (err < 0)
2481 goto out_free_threads;
2482 } else {
2483 err = perf_session__write_header(session, rec->evlist, fd, false);
2484 if (err < 0)
2485 goto out_free_threads;
2486 }
2487
2488 err = -1;
2489 if (!rec->no_buildid
2490 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2491 pr_err("Couldn't generate buildids. "
2492 "Use --no-buildid to profile anyway.\n");
2493 goto out_free_threads;
2494 }
2495
2496 err = record__setup_sb_evlist(rec);
2497 if (err)
2498 goto out_free_threads;
2499
2500 err = record__synthesize(rec, false);
2501 if (err < 0)
2502 goto out_free_threads;
2503
2504 if (rec->realtime_prio) {
2505 struct sched_param param;
2506
2507 param.sched_priority = rec->realtime_prio;
2508 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
2509 pr_err("Could not set realtime priority.\n");
2510 err = -1;
2511 goto out_free_threads;
2512 }
2513 }
2514
2515 if (record__start_threads(rec))
2516 goto out_free_threads;
2517
2518 /*
2519 * When perf is starting the traced process, all the events
2520 * (apart from group members) have enable_on_exec=1 set,
2521 * so don't spoil it by prematurely enabling them.
2522 */
2523 if (!target__none(&opts->target) && !opts->target.initial_delay)
2524 evlist__enable(rec->evlist);
2525
2526 /*
2527 * Let the child rip
2528 */
2529 if (forks) {
2530 struct machine *machine = &session->machines.host;
2531 union perf_event *event;
2532 pid_t tgid;
2533
2534 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2535 if (event == NULL) {
2536 err = -ENOMEM;
2537 goto out_child;
2538 }
2539
2540 /*
2541 * Some H/W events are generated before COMM event
2542 * which is emitted during exec(), so perf script
2543 * cannot see a correct process name for those events.
2544 * Synthesize COMM event to prevent it.
2545 */
2546 tgid = perf_event__synthesize_comm(tool, event,
2547 rec->evlist->workload.pid,
2548 process_synthesized_event,
2549 machine);
2550 free(event);
2551
2552 if (tgid == -1)
2553 goto out_child;
2554
2555 event = malloc(sizeof(event->namespaces) +
2556 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2557 machine->id_hdr_size);
2558 if (event == NULL) {
2559 err = -ENOMEM;
2560 goto out_child;
2561 }
2562
2563 /*
2564 * Synthesize NAMESPACES event for the command specified.
2565 */
2566 perf_event__synthesize_namespaces(tool, event,
2567 rec->evlist->workload.pid,
2568 tgid, process_synthesized_event,
2569 machine);
2570 free(event);
2571
2572 evlist__start_workload(rec->evlist);
2573 }
2574
2575 if (opts->target.initial_delay) {
2576 pr_info(EVLIST_DISABLED_MSG);
2577 if (opts->target.initial_delay > 0) {
2578 usleep(opts->target.initial_delay * USEC_PER_MSEC);
2579 evlist__enable(rec->evlist);
2580 pr_info(EVLIST_ENABLED_MSG);
2581 }
2582 }
2583
2584 err = event_enable_timer__start(rec->evlist->eet);
2585 if (err)
2586 goto out_child;
2587
2588 /* Debug message used by test scripts */
2589 pr_debug3("perf record has started\n");
2590 fflush(stderr);
2591
2592 trigger_ready(&auxtrace_snapshot_trigger);
2593 trigger_ready(&switch_output_trigger);
2594 perf_hooks__invoke_record_start();
2595
2596 /*
2597 * Must write FINISHED_INIT so it will be seen after all other
2598 * synthesized user events, but before any regular events.
2599 */
2600 err = write_finished_init(rec, false);
2601 if (err < 0)
2602 goto out_child;
2603
2604 for (;;) {
2605 unsigned long long hits = thread->samples;
2606
2607 /*
2608 * rec->evlist->bkw_mmap_state is possible to be
2609 * BKW_MMAP_EMPTY here: when done == true and
2610 * hits != rec->samples in previous round.
2611 *
2612 * evlist__toggle_bkw_mmap ensure we never
2613 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2614 */
2615 if (trigger_is_hit(&switch_output_trigger) || done || draining)
2616 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2617
2618 if (record__mmap_read_all(rec, false) < 0) {
2619 trigger_error(&auxtrace_snapshot_trigger);
2620 trigger_error(&switch_output_trigger);
2621 err = -1;
2622 goto out_child;
2623 }
2624
2625 if (auxtrace_record__snapshot_started) {
2626 auxtrace_record__snapshot_started = 0;
2627 if (!trigger_is_error(&auxtrace_snapshot_trigger))
2628 record__read_auxtrace_snapshot(rec, false);
2629 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2630 pr_err("AUX area tracing snapshot failed\n");
2631 err = -1;
2632 goto out_child;
2633 }
2634 }
2635
2636 if (trigger_is_hit(&switch_output_trigger)) {
2637 /*
2638 * If switch_output_trigger is hit, the data in
2639 * overwritable ring buffer should have been collected,
2640 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2641 *
2642 * If SIGUSR2 raise after or during record__mmap_read_all(),
2643 * record__mmap_read_all() didn't collect data from
2644 * overwritable ring buffer. Read again.
2645 */
2646 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2647 continue;
2648 trigger_ready(&switch_output_trigger);
2649
2650 /*
2651 * Reenable events in overwrite ring buffer after
2652 * record__mmap_read_all(): we should have collected
2653 * data from it.
2654 */
2655 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2656
2657 if (!quiet)
2658 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2659 record__waking(rec));
2660 thread->waking = 0;
2661 fd = record__switch_output(rec, false);
2662 if (fd < 0) {
2663 pr_err("Failed to switch to new file\n");
2664 trigger_error(&switch_output_trigger);
2665 err = fd;
2666 goto out_child;
2667 }
2668
2669 /* re-arm the alarm */
2670 if (rec->switch_output.time)
2671 alarm(rec->switch_output.time);
2672 }
2673
2674 if (hits == thread->samples) {
2675 if (done || draining)
2676 break;
2677 err = fdarray__poll(&thread->pollfd, -1);
2678 /*
2679 * Propagate error, only if there's any. Ignore positive
2680 * number of returned events and interrupt error.
2681 */
2682 if (err > 0 || (err < 0 && errno == EINTR))
2683 err = 0;
2684 thread->waking++;
2685
2686 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2687 record__thread_munmap_filtered, NULL) == 0)
2688 draining = true;
2689
2690 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2691 if (err)
2692 goto out_child;
2693 }
2694
2695 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2696 switch (cmd) {
2697 case EVLIST_CTL_CMD_SNAPSHOT:
2698 hit_auxtrace_snapshot_trigger(rec);
2699 evlist__ctlfd_ack(rec->evlist);
2700 break;
2701 case EVLIST_CTL_CMD_STOP:
2702 done = 1;
2703 break;
2704 case EVLIST_CTL_CMD_ACK:
2705 case EVLIST_CTL_CMD_UNSUPPORTED:
2706 case EVLIST_CTL_CMD_ENABLE:
2707 case EVLIST_CTL_CMD_DISABLE:
2708 case EVLIST_CTL_CMD_EVLIST:
2709 case EVLIST_CTL_CMD_PING:
2710 default:
2711 break;
2712 }
2713 }
2714
2715 err = event_enable_timer__process(rec->evlist->eet);
2716 if (err < 0)
2717 goto out_child;
2718 if (err) {
2719 err = 0;
2720 done = 1;
2721 }
2722
2723 /*
2724 * When perf is starting the traced process, at the end events
2725 * die with the process and we wait for that. Thus no need to
2726 * disable events in this case.
2727 */
2728 if (done && !disabled && !target__none(&opts->target)) {
2729 trigger_off(&auxtrace_snapshot_trigger);
2730 evlist__disable(rec->evlist);
2731 disabled = true;
2732 }
2733 }
2734
2735 trigger_off(&auxtrace_snapshot_trigger);
2736 trigger_off(&switch_output_trigger);
2737
2738 if (opts->auxtrace_snapshot_on_exit)
2739 record__auxtrace_snapshot_exit(rec);
2740
2741 if (forks && workload_exec_errno) {
2742 char msg[STRERR_BUFSIZE], strevsels[2048];
2743 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2744
2745 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2746
2747 pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2748 strevsels, argv[0], emsg);
2749 err = -1;
2750 goto out_child;
2751 }
2752
2753 if (!quiet)
2754 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2755 record__waking(rec));
2756
2757 write_finished_init(rec, true);
2758
2759 if (target__none(&rec->opts.target))
2760 record__synthesize_workload(rec, true);
2761
2762 out_child:
2763 record__stop_threads(rec);
2764 record__mmap_read_all(rec, true);
2765 out_free_threads:
2766 record__free_thread_data(rec);
2767 evlist__finalize_ctlfd(rec->evlist);
2768 record__aio_mmap_read_sync(rec);
2769
2770 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2771 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2772 session->header.env.comp_ratio = ratio + 0.5;
2773 }
2774
2775 if (forks) {
2776 int exit_status;
2777
2778 if (!child_finished)
2779 kill(rec->evlist->workload.pid, SIGTERM);
2780
2781 wait(&exit_status);
2782
2783 if (err < 0)
2784 status = err;
2785 else if (WIFEXITED(exit_status))
2786 status = WEXITSTATUS(exit_status);
2787 else if (WIFSIGNALED(exit_status))
2788 signr = WTERMSIG(exit_status);
2789 } else
2790 status = err;
2791
2792 if (rec->off_cpu)
2793 rec->bytes_written += off_cpu_write(rec->session);
2794
2795 record__read_lost_samples(rec);
2796 record__synthesize(rec, true);
2797 /* this will be recalculated during process_buildids() */
2798 rec->samples = 0;
2799
2800 if (!err) {
2801 if (!rec->timestamp_filename) {
2802 record__finish_output(rec);
2803 } else {
2804 fd = record__switch_output(rec, true);
2805 if (fd < 0) {
2806 status = fd;
2807 goto out_delete_session;
2808 }
2809 }
2810 }
2811
2812 perf_hooks__invoke_record_end();
2813
2814 if (!err && !quiet) {
2815 char samples[128];
2816 const char *postfix = rec->timestamp_filename ?
2817 ".<timestamp>" : "";
2818
2819 if (rec->samples && !rec->opts.full_auxtrace)
2820 scnprintf(samples, sizeof(samples),
2821 " (%" PRIu64 " samples)", rec->samples);
2822 else
2823 samples[0] = '\0';
2824
2825 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2826 perf_data__size(data) / 1024.0 / 1024.0,
2827 data->path, postfix, samples);
2828 if (ratio) {
2829 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2830 rec->session->bytes_transferred / 1024.0 / 1024.0,
2831 ratio);
2832 }
2833 fprintf(stderr, " ]\n");
2834 }
2835
2836 out_delete_session:
2837 #ifdef HAVE_EVENTFD_SUPPORT
2838 if (done_fd >= 0) {
2839 fd = done_fd;
2840 done_fd = -1;
2841
2842 close(fd);
2843 }
2844 #endif
2845 zstd_fini(&session->zstd_data);
2846 perf_session__delete(session);
2847
2848 if (!opts->no_bpf_event)
2849 evlist__stop_sb_thread(rec->sb_evlist);
2850 return status;
2851 }
2852
callchain_debug(struct callchain_param * callchain)2853 static void callchain_debug(struct callchain_param *callchain)
2854 {
2855 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2856
2857 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2858
2859 if (callchain->record_mode == CALLCHAIN_DWARF)
2860 pr_debug("callchain: stack dump size %d\n",
2861 callchain->dump_size);
2862 }
2863
record_opts__parse_callchain(struct record_opts * record,struct callchain_param * callchain,const char * arg,bool unset)2864 int record_opts__parse_callchain(struct record_opts *record,
2865 struct callchain_param *callchain,
2866 const char *arg, bool unset)
2867 {
2868 int ret;
2869 callchain->enabled = !unset;
2870
2871 /* --no-call-graph */
2872 if (unset) {
2873 callchain->record_mode = CALLCHAIN_NONE;
2874 pr_debug("callchain: disabled\n");
2875 return 0;
2876 }
2877
2878 ret = parse_callchain_record_opt(arg, callchain);
2879 if (!ret) {
2880 /* Enable data address sampling for DWARF unwind. */
2881 if (callchain->record_mode == CALLCHAIN_DWARF)
2882 record->sample_address = true;
2883 callchain_debug(callchain);
2884 }
2885
2886 return ret;
2887 }
2888
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)2889 int record_parse_callchain_opt(const struct option *opt,
2890 const char *arg,
2891 int unset)
2892 {
2893 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2894 }
2895
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)2896 int record_callchain_opt(const struct option *opt,
2897 const char *arg __maybe_unused,
2898 int unset __maybe_unused)
2899 {
2900 struct callchain_param *callchain = opt->value;
2901
2902 callchain->enabled = true;
2903
2904 if (callchain->record_mode == CALLCHAIN_NONE)
2905 callchain->record_mode = CALLCHAIN_FP;
2906
2907 callchain_debug(callchain);
2908 return 0;
2909 }
2910
perf_record_config(const char * var,const char * value,void * cb)2911 static int perf_record_config(const char *var, const char *value, void *cb)
2912 {
2913 struct record *rec = cb;
2914
2915 if (!strcmp(var, "record.build-id")) {
2916 if (!strcmp(value, "cache"))
2917 rec->no_buildid_cache = false;
2918 else if (!strcmp(value, "no-cache"))
2919 rec->no_buildid_cache = true;
2920 else if (!strcmp(value, "skip"))
2921 rec->no_buildid = true;
2922 else if (!strcmp(value, "mmap"))
2923 rec->buildid_mmap = true;
2924 else
2925 return -1;
2926 return 0;
2927 }
2928 if (!strcmp(var, "record.call-graph")) {
2929 var = "call-graph.record-mode";
2930 return perf_default_config(var, value, cb);
2931 }
2932 #ifdef HAVE_AIO_SUPPORT
2933 if (!strcmp(var, "record.aio")) {
2934 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2935 if (!rec->opts.nr_cblocks)
2936 rec->opts.nr_cblocks = nr_cblocks_default;
2937 }
2938 #endif
2939 if (!strcmp(var, "record.debuginfod")) {
2940 rec->debuginfod.urls = strdup(value);
2941 if (!rec->debuginfod.urls)
2942 return -ENOMEM;
2943 rec->debuginfod.set = true;
2944 }
2945
2946 return 0;
2947 }
2948
record__parse_event_enable_time(const struct option * opt,const char * str,int unset)2949 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
2950 {
2951 struct record *rec = (struct record *)opt->value;
2952
2953 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
2954 }
2955
record__parse_affinity(const struct option * opt,const char * str,int unset)2956 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2957 {
2958 struct record_opts *opts = (struct record_opts *)opt->value;
2959
2960 if (unset || !str)
2961 return 0;
2962
2963 if (!strcasecmp(str, "node"))
2964 opts->affinity = PERF_AFFINITY_NODE;
2965 else if (!strcasecmp(str, "cpu"))
2966 opts->affinity = PERF_AFFINITY_CPU;
2967
2968 return 0;
2969 }
2970
record__mmap_cpu_mask_alloc(struct mmap_cpu_mask * mask,int nr_bits)2971 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2972 {
2973 mask->nbits = nr_bits;
2974 mask->bits = bitmap_zalloc(mask->nbits);
2975 if (!mask->bits)
2976 return -ENOMEM;
2977
2978 return 0;
2979 }
2980
record__mmap_cpu_mask_free(struct mmap_cpu_mask * mask)2981 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2982 {
2983 bitmap_free(mask->bits);
2984 mask->nbits = 0;
2985 }
2986
record__thread_mask_alloc(struct thread_mask * mask,int nr_bits)2987 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2988 {
2989 int ret;
2990
2991 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2992 if (ret) {
2993 mask->affinity.bits = NULL;
2994 return ret;
2995 }
2996
2997 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2998 if (ret) {
2999 record__mmap_cpu_mask_free(&mask->maps);
3000 mask->maps.bits = NULL;
3001 }
3002
3003 return ret;
3004 }
3005
record__thread_mask_free(struct thread_mask * mask)3006 static void record__thread_mask_free(struct thread_mask *mask)
3007 {
3008 record__mmap_cpu_mask_free(&mask->maps);
3009 record__mmap_cpu_mask_free(&mask->affinity);
3010 }
3011
record__parse_threads(const struct option * opt,const char * str,int unset)3012 static int record__parse_threads(const struct option *opt, const char *str, int unset)
3013 {
3014 int s;
3015 struct record_opts *opts = opt->value;
3016
3017 if (unset || !str || !strlen(str)) {
3018 opts->threads_spec = THREAD_SPEC__CPU;
3019 } else {
3020 for (s = 1; s < THREAD_SPEC__MAX; s++) {
3021 if (s == THREAD_SPEC__USER) {
3022 opts->threads_user_spec = strdup(str);
3023 if (!opts->threads_user_spec)
3024 return -ENOMEM;
3025 opts->threads_spec = THREAD_SPEC__USER;
3026 break;
3027 }
3028 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3029 opts->threads_spec = s;
3030 break;
3031 }
3032 }
3033 }
3034
3035 if (opts->threads_spec == THREAD_SPEC__USER)
3036 pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3037 else
3038 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3039
3040 return 0;
3041 }
3042
parse_output_max_size(const struct option * opt,const char * str,int unset)3043 static int parse_output_max_size(const struct option *opt,
3044 const char *str, int unset)
3045 {
3046 unsigned long *s = (unsigned long *)opt->value;
3047 static struct parse_tag tags_size[] = {
3048 { .tag = 'B', .mult = 1 },
3049 { .tag = 'K', .mult = 1 << 10 },
3050 { .tag = 'M', .mult = 1 << 20 },
3051 { .tag = 'G', .mult = 1 << 30 },
3052 { .tag = 0 },
3053 };
3054 unsigned long val;
3055
3056 if (unset) {
3057 *s = 0;
3058 return 0;
3059 }
3060
3061 val = parse_tag_value(str, tags_size);
3062 if (val != (unsigned long) -1) {
3063 *s = val;
3064 return 0;
3065 }
3066
3067 return -1;
3068 }
3069
record__parse_mmap_pages(const struct option * opt,const char * str,int unset __maybe_unused)3070 static int record__parse_mmap_pages(const struct option *opt,
3071 const char *str,
3072 int unset __maybe_unused)
3073 {
3074 struct record_opts *opts = opt->value;
3075 char *s, *p;
3076 unsigned int mmap_pages;
3077 int ret;
3078
3079 if (!str)
3080 return -EINVAL;
3081
3082 s = strdup(str);
3083 if (!s)
3084 return -ENOMEM;
3085
3086 p = strchr(s, ',');
3087 if (p)
3088 *p = '\0';
3089
3090 if (*s) {
3091 ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3092 if (ret)
3093 goto out_free;
3094 opts->mmap_pages = mmap_pages;
3095 }
3096
3097 if (!p) {
3098 ret = 0;
3099 goto out_free;
3100 }
3101
3102 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3103 if (ret)
3104 goto out_free;
3105
3106 opts->auxtrace_mmap_pages = mmap_pages;
3107
3108 out_free:
3109 free(s);
3110 return ret;
3111 }
3112
arch__add_leaf_frame_record_opts(struct record_opts * opts __maybe_unused)3113 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3114 {
3115 }
3116
parse_control_option(const struct option * opt,const char * str,int unset __maybe_unused)3117 static int parse_control_option(const struct option *opt,
3118 const char *str,
3119 int unset __maybe_unused)
3120 {
3121 struct record_opts *opts = opt->value;
3122
3123 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3124 }
3125
switch_output_size_warn(struct record * rec)3126 static void switch_output_size_warn(struct record *rec)
3127 {
3128 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3129 struct switch_output *s = &rec->switch_output;
3130
3131 wakeup_size /= 2;
3132
3133 if (s->size < wakeup_size) {
3134 char buf[100];
3135
3136 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3137 pr_warning("WARNING: switch-output data size lower than "
3138 "wakeup kernel buffer size (%s) "
3139 "expect bigger perf.data sizes\n", buf);
3140 }
3141 }
3142
switch_output_setup(struct record * rec)3143 static int switch_output_setup(struct record *rec)
3144 {
3145 struct switch_output *s = &rec->switch_output;
3146 static struct parse_tag tags_size[] = {
3147 { .tag = 'B', .mult = 1 },
3148 { .tag = 'K', .mult = 1 << 10 },
3149 { .tag = 'M', .mult = 1 << 20 },
3150 { .tag = 'G', .mult = 1 << 30 },
3151 { .tag = 0 },
3152 };
3153 static struct parse_tag tags_time[] = {
3154 { .tag = 's', .mult = 1 },
3155 { .tag = 'm', .mult = 60 },
3156 { .tag = 'h', .mult = 60*60 },
3157 { .tag = 'd', .mult = 60*60*24 },
3158 { .tag = 0 },
3159 };
3160 unsigned long val;
3161
3162 /*
3163 * If we're using --switch-output-events, then we imply its
3164 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3165 * thread to its parent.
3166 */
3167 if (rec->switch_output_event_set) {
3168 if (record__threads_enabled(rec)) {
3169 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3170 return 0;
3171 }
3172 goto do_signal;
3173 }
3174
3175 if (!s->set)
3176 return 0;
3177
3178 if (record__threads_enabled(rec)) {
3179 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3180 return 0;
3181 }
3182
3183 if (!strcmp(s->str, "signal")) {
3184 do_signal:
3185 s->signal = true;
3186 pr_debug("switch-output with SIGUSR2 signal\n");
3187 goto enabled;
3188 }
3189
3190 val = parse_tag_value(s->str, tags_size);
3191 if (val != (unsigned long) -1) {
3192 s->size = val;
3193 pr_debug("switch-output with %s size threshold\n", s->str);
3194 goto enabled;
3195 }
3196
3197 val = parse_tag_value(s->str, tags_time);
3198 if (val != (unsigned long) -1) {
3199 s->time = val;
3200 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3201 s->str, s->time);
3202 goto enabled;
3203 }
3204
3205 return -1;
3206
3207 enabled:
3208 rec->timestamp_filename = true;
3209 s->enabled = true;
3210
3211 if (s->size && !rec->opts.no_buffering)
3212 switch_output_size_warn(rec);
3213
3214 return 0;
3215 }
3216
3217 static const char * const __record_usage[] = {
3218 "perf record [<options>] [<command>]",
3219 "perf record [<options>] -- <command> [<options>]",
3220 NULL
3221 };
3222 const char * const *record_usage = __record_usage;
3223
build_id__process_mmap(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3224 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3225 struct perf_sample *sample, struct machine *machine)
3226 {
3227 /*
3228 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3229 * no need to add them twice.
3230 */
3231 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3232 return 0;
3233 return perf_event__process_mmap(tool, event, sample, machine);
3234 }
3235
build_id__process_mmap2(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3236 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3237 struct perf_sample *sample, struct machine *machine)
3238 {
3239 /*
3240 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3241 * no need to add them twice.
3242 */
3243 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3244 return 0;
3245
3246 return perf_event__process_mmap2(tool, event, sample, machine);
3247 }
3248
process_timestamp_boundary(struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct machine * machine __maybe_unused)3249 static int process_timestamp_boundary(struct perf_tool *tool,
3250 union perf_event *event __maybe_unused,
3251 struct perf_sample *sample,
3252 struct machine *machine __maybe_unused)
3253 {
3254 struct record *rec = container_of(tool, struct record, tool);
3255
3256 set_timestamp_boundary(rec, sample->time);
3257 return 0;
3258 }
3259
parse_record_synth_option(const struct option * opt,const char * str,int unset __maybe_unused)3260 static int parse_record_synth_option(const struct option *opt,
3261 const char *str,
3262 int unset __maybe_unused)
3263 {
3264 struct record_opts *opts = opt->value;
3265 char *p = strdup(str);
3266
3267 if (p == NULL)
3268 return -1;
3269
3270 opts->synth = parse_synth_opt(p);
3271 free(p);
3272
3273 if (opts->synth < 0) {
3274 pr_err("Invalid synth option: %s\n", str);
3275 return -1;
3276 }
3277 return 0;
3278 }
3279
3280 /*
3281 * XXX Ideally would be local to cmd_record() and passed to a record__new
3282 * because we need to have access to it in record__exit, that is called
3283 * after cmd_record() exits, but since record_options need to be accessible to
3284 * builtin-script, leave it here.
3285 *
3286 * At least we don't ouch it in all the other functions here directly.
3287 *
3288 * Just say no to tons of global variables, sigh.
3289 */
3290 static struct record record = {
3291 .opts = {
3292 .sample_time = true,
3293 .mmap_pages = UINT_MAX,
3294 .user_freq = UINT_MAX,
3295 .user_interval = ULLONG_MAX,
3296 .freq = 4000,
3297 .target = {
3298 .uses_mmap = true,
3299 .default_per_cpu = true,
3300 },
3301 .mmap_flush = MMAP_FLUSH_DEFAULT,
3302 .nr_threads_synthesize = 1,
3303 .ctl_fd = -1,
3304 .ctl_fd_ack = -1,
3305 .synth = PERF_SYNTH_ALL,
3306 },
3307 .tool = {
3308 .sample = process_sample_event,
3309 .fork = perf_event__process_fork,
3310 .exit = perf_event__process_exit,
3311 .comm = perf_event__process_comm,
3312 .namespaces = perf_event__process_namespaces,
3313 .mmap = build_id__process_mmap,
3314 .mmap2 = build_id__process_mmap2,
3315 .itrace_start = process_timestamp_boundary,
3316 .aux = process_timestamp_boundary,
3317 .ordered_events = true,
3318 },
3319 };
3320
3321 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3322 "\n\t\t\t\tDefault: fp";
3323
3324 static bool dry_run;
3325
3326 static struct parse_events_option_args parse_events_option_args = {
3327 .evlistp = &record.evlist,
3328 };
3329
3330 static struct parse_events_option_args switch_output_parse_events_option_args = {
3331 .evlistp = &record.sb_evlist,
3332 };
3333
3334 /*
3335 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3336 * with it and switch to use the library functions in perf_evlist that came
3337 * from builtin-record.c, i.e. use record_opts,
3338 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3339 * using pipes, etc.
3340 */
3341 static struct option __record_options[] = {
3342 OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3343 "event selector. use 'perf list' to list available events",
3344 parse_events_option),
3345 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3346 "event filter", parse_filter),
3347 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3348 NULL, "don't record events from perf itself",
3349 exclude_perf),
3350 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3351 "record events on existing process id"),
3352 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3353 "record events on existing thread id"),
3354 OPT_INTEGER('r', "realtime", &record.realtime_prio,
3355 "collect data with this RT SCHED_FIFO priority"),
3356 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3357 "collect data without buffering"),
3358 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3359 "collect raw sample records from all opened counters"),
3360 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3361 "system-wide collection from all CPUs"),
3362 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3363 "list of cpus to monitor"),
3364 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3365 OPT_STRING('o', "output", &record.data.path, "file",
3366 "output file name"),
3367 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3368 &record.opts.no_inherit_set,
3369 "child tasks do not inherit counters"),
3370 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3371 "synthesize non-sample events at the end of output"),
3372 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3373 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3374 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3375 "Fail if the specified frequency can't be used"),
3376 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3377 "profile at this frequency",
3378 record__parse_freq),
3379 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3380 "number of mmap data pages and AUX area tracing mmap pages",
3381 record__parse_mmap_pages),
3382 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3383 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3384 record__mmap_flush_parse),
3385 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3386 NULL, "enables call-graph recording" ,
3387 &record_callchain_opt),
3388 OPT_CALLBACK(0, "call-graph", &record.opts,
3389 "record_mode[,record_size]", record_callchain_help,
3390 &record_parse_callchain_opt),
3391 OPT_INCR('v', "verbose", &verbose,
3392 "be more verbose (show counter open errors, etc)"),
3393 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3394 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3395 "per thread counts"),
3396 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3397 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3398 "Record the sample physical addresses"),
3399 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3400 "Record the sampled data address data page size"),
3401 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3402 "Record the sampled code address (ip) page size"),
3403 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3404 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3405 "Record the sample identifier"),
3406 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3407 &record.opts.sample_time_set,
3408 "Record the sample timestamps"),
3409 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3410 "Record the sample period"),
3411 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3412 "don't sample"),
3413 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3414 &record.no_buildid_cache_set,
3415 "do not update the buildid cache"),
3416 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3417 &record.no_buildid_set,
3418 "do not collect buildids in perf.data"),
3419 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3420 "monitor event in cgroup name only",
3421 parse_cgroups),
3422 OPT_CALLBACK('D', "delay", &record, "ms",
3423 "ms to wait before starting measurement after program start (-1: start with events disabled), "
3424 "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3425 record__parse_event_enable_time),
3426 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3427 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3428 "user to profile"),
3429
3430 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3431 "branch any", "sample any taken branches",
3432 parse_branch_stack),
3433
3434 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3435 "branch filter mask", "branch stack filter modes",
3436 parse_branch_stack),
3437 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3438 "sample by weight (on special events only)"),
3439 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3440 "sample transaction flags (special events only)"),
3441 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3442 "use per-thread mmaps"),
3443 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3444 "sample selected machine registers on interrupt,"
3445 " use '-I?' to list register names", parse_intr_regs),
3446 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3447 "sample selected machine registers on interrupt,"
3448 " use '--user-regs=?' to list register names", parse_user_regs),
3449 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3450 "Record running/enabled time of read (:S) events"),
3451 OPT_CALLBACK('k', "clockid", &record.opts,
3452 "clockid", "clockid to use for events, see clock_gettime()",
3453 parse_clockid),
3454 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3455 "opts", "AUX area tracing Snapshot Mode", ""),
3456 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3457 "opts", "sample AUX area", ""),
3458 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3459 "per thread proc mmap processing timeout in ms"),
3460 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3461 "Record namespaces events"),
3462 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3463 "Record cgroup events"),
3464 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3465 &record.opts.record_switch_events_set,
3466 "Record context switch events"),
3467 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3468 "Configure all used events to run in kernel space.",
3469 PARSE_OPT_EXCLUSIVE),
3470 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3471 "Configure all used events to run in user space.",
3472 PARSE_OPT_EXCLUSIVE),
3473 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3474 "collect kernel callchains"),
3475 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3476 "collect user callchains"),
3477 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3478 "file", "vmlinux pathname"),
3479 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3480 "Record build-id of all DSOs regardless of hits"),
3481 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3482 "Record build-id in map events"),
3483 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3484 "append timestamp to output filename"),
3485 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3486 "Record timestamp boundary (time of first/last samples)"),
3487 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3488 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3489 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3490 "signal"),
3491 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3492 &record.switch_output_event_set, "switch output event",
3493 "switch output event selector. use 'perf list' to list available events",
3494 parse_events_option_new_evlist),
3495 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3496 "Limit number of switch output generated files"),
3497 OPT_BOOLEAN(0, "dry-run", &dry_run,
3498 "Parse options then exit"),
3499 #ifdef HAVE_AIO_SUPPORT
3500 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3501 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3502 record__aio_parse),
3503 #endif
3504 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3505 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3506 record__parse_affinity),
3507 #ifdef HAVE_ZSTD_SUPPORT
3508 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3509 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3510 record__parse_comp_level),
3511 #endif
3512 OPT_CALLBACK(0, "max-size", &record.output_max_size,
3513 "size", "Limit the maximum size of the output file", parse_output_max_size),
3514 OPT_UINTEGER(0, "num-thread-synthesize",
3515 &record.opts.nr_threads_synthesize,
3516 "number of threads to run for event synthesis"),
3517 #ifdef HAVE_LIBPFM
3518 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3519 "libpfm4 event selector. use 'perf list' to list available events",
3520 parse_libpfm_events_option),
3521 #endif
3522 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3523 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3524 "\t\t\t 'snapshot': AUX area tracing snapshot).\n"
3525 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3526 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3527 parse_control_option),
3528 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3529 "Fine-tune event synthesis: default=all", parse_record_synth_option),
3530 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3531 &record.debuginfod.set, "debuginfod urls",
3532 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3533 "system"),
3534 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3535 "write collected trace data into several data files using parallel threads",
3536 record__parse_threads),
3537 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3538 OPT_END()
3539 };
3540
3541 struct option *record_options = __record_options;
3542
record__mmap_cpu_mask_init(struct mmap_cpu_mask * mask,struct perf_cpu_map * cpus)3543 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3544 {
3545 struct perf_cpu cpu;
3546 int idx;
3547
3548 if (cpu_map__is_dummy(cpus))
3549 return 0;
3550
3551 perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
3552 if (cpu.cpu == -1)
3553 continue;
3554 /* Return ENODEV is input cpu is greater than max cpu */
3555 if ((unsigned long)cpu.cpu > mask->nbits)
3556 return -ENODEV;
3557 __set_bit(cpu.cpu, mask->bits);
3558 }
3559
3560 return 0;
3561 }
3562
record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask * mask,const char * mask_spec)3563 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3564 {
3565 struct perf_cpu_map *cpus;
3566
3567 cpus = perf_cpu_map__new(mask_spec);
3568 if (!cpus)
3569 return -ENOMEM;
3570
3571 bitmap_zero(mask->bits, mask->nbits);
3572 if (record__mmap_cpu_mask_init(mask, cpus))
3573 return -ENODEV;
3574
3575 perf_cpu_map__put(cpus);
3576
3577 return 0;
3578 }
3579
record__free_thread_masks(struct record * rec,int nr_threads)3580 static void record__free_thread_masks(struct record *rec, int nr_threads)
3581 {
3582 int t;
3583
3584 if (rec->thread_masks)
3585 for (t = 0; t < nr_threads; t++)
3586 record__thread_mask_free(&rec->thread_masks[t]);
3587
3588 zfree(&rec->thread_masks);
3589 }
3590
record__alloc_thread_masks(struct record * rec,int nr_threads,int nr_bits)3591 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3592 {
3593 int t, ret;
3594
3595 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3596 if (!rec->thread_masks) {
3597 pr_err("Failed to allocate thread masks\n");
3598 return -ENOMEM;
3599 }
3600
3601 for (t = 0; t < nr_threads; t++) {
3602 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3603 if (ret) {
3604 pr_err("Failed to allocate thread masks[%d]\n", t);
3605 goto out_free;
3606 }
3607 }
3608
3609 return 0;
3610
3611 out_free:
3612 record__free_thread_masks(rec, nr_threads);
3613
3614 return ret;
3615 }
3616
record__init_thread_cpu_masks(struct record * rec,struct perf_cpu_map * cpus)3617 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3618 {
3619 int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3620
3621 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3622 if (ret)
3623 return ret;
3624
3625 rec->nr_threads = nr_cpus;
3626 pr_debug("nr_threads: %d\n", rec->nr_threads);
3627
3628 for (t = 0; t < rec->nr_threads; t++) {
3629 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3630 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3631 if (verbose > 0) {
3632 pr_debug("thread_masks[%d]: ", t);
3633 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3634 pr_debug("thread_masks[%d]: ", t);
3635 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3636 }
3637 }
3638
3639 return 0;
3640 }
3641
record__init_thread_masks_spec(struct record * rec,struct perf_cpu_map * cpus,const char ** maps_spec,const char ** affinity_spec,u32 nr_spec)3642 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3643 const char **maps_spec, const char **affinity_spec,
3644 u32 nr_spec)
3645 {
3646 u32 s;
3647 int ret = 0, t = 0;
3648 struct mmap_cpu_mask cpus_mask;
3649 struct thread_mask thread_mask, full_mask, *thread_masks;
3650
3651 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3652 if (ret) {
3653 pr_err("Failed to allocate CPUs mask\n");
3654 return ret;
3655 }
3656
3657 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3658 if (ret) {
3659 pr_err("Failed to init cpu mask\n");
3660 goto out_free_cpu_mask;
3661 }
3662
3663 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3664 if (ret) {
3665 pr_err("Failed to allocate full mask\n");
3666 goto out_free_cpu_mask;
3667 }
3668
3669 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3670 if (ret) {
3671 pr_err("Failed to allocate thread mask\n");
3672 goto out_free_full_and_cpu_masks;
3673 }
3674
3675 for (s = 0; s < nr_spec; s++) {
3676 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3677 if (ret) {
3678 pr_err("Failed to initialize maps thread mask\n");
3679 goto out_free;
3680 }
3681 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3682 if (ret) {
3683 pr_err("Failed to initialize affinity thread mask\n");
3684 goto out_free;
3685 }
3686
3687 /* ignore invalid CPUs but do not allow empty masks */
3688 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3689 cpus_mask.bits, thread_mask.maps.nbits)) {
3690 pr_err("Empty maps mask: %s\n", maps_spec[s]);
3691 ret = -EINVAL;
3692 goto out_free;
3693 }
3694 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3695 cpus_mask.bits, thread_mask.affinity.nbits)) {
3696 pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3697 ret = -EINVAL;
3698 goto out_free;
3699 }
3700
3701 /* do not allow intersection with other masks (full_mask) */
3702 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3703 thread_mask.maps.nbits)) {
3704 pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3705 ret = -EINVAL;
3706 goto out_free;
3707 }
3708 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3709 thread_mask.affinity.nbits)) {
3710 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3711 ret = -EINVAL;
3712 goto out_free;
3713 }
3714
3715 bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3716 thread_mask.maps.bits, full_mask.maps.nbits);
3717 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3718 thread_mask.affinity.bits, full_mask.maps.nbits);
3719
3720 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3721 if (!thread_masks) {
3722 pr_err("Failed to reallocate thread masks\n");
3723 ret = -ENOMEM;
3724 goto out_free;
3725 }
3726 rec->thread_masks = thread_masks;
3727 rec->thread_masks[t] = thread_mask;
3728 if (verbose > 0) {
3729 pr_debug("thread_masks[%d]: ", t);
3730 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3731 pr_debug("thread_masks[%d]: ", t);
3732 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3733 }
3734 t++;
3735 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3736 if (ret) {
3737 pr_err("Failed to allocate thread mask\n");
3738 goto out_free_full_and_cpu_masks;
3739 }
3740 }
3741 rec->nr_threads = t;
3742 pr_debug("nr_threads: %d\n", rec->nr_threads);
3743 if (!rec->nr_threads)
3744 ret = -EINVAL;
3745
3746 out_free:
3747 record__thread_mask_free(&thread_mask);
3748 out_free_full_and_cpu_masks:
3749 record__thread_mask_free(&full_mask);
3750 out_free_cpu_mask:
3751 record__mmap_cpu_mask_free(&cpus_mask);
3752
3753 return ret;
3754 }
3755
record__init_thread_core_masks(struct record * rec,struct perf_cpu_map * cpus)3756 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3757 {
3758 int ret;
3759 struct cpu_topology *topo;
3760
3761 topo = cpu_topology__new();
3762 if (!topo) {
3763 pr_err("Failed to allocate CPU topology\n");
3764 return -ENOMEM;
3765 }
3766
3767 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3768 topo->core_cpus_list, topo->core_cpus_lists);
3769 cpu_topology__delete(topo);
3770
3771 return ret;
3772 }
3773
record__init_thread_package_masks(struct record * rec,struct perf_cpu_map * cpus)3774 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3775 {
3776 int ret;
3777 struct cpu_topology *topo;
3778
3779 topo = cpu_topology__new();
3780 if (!topo) {
3781 pr_err("Failed to allocate CPU topology\n");
3782 return -ENOMEM;
3783 }
3784
3785 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3786 topo->package_cpus_list, topo->package_cpus_lists);
3787 cpu_topology__delete(topo);
3788
3789 return ret;
3790 }
3791
record__init_thread_numa_masks(struct record * rec,struct perf_cpu_map * cpus)3792 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3793 {
3794 u32 s;
3795 int ret;
3796 const char **spec;
3797 struct numa_topology *topo;
3798
3799 topo = numa_topology__new();
3800 if (!topo) {
3801 pr_err("Failed to allocate NUMA topology\n");
3802 return -ENOMEM;
3803 }
3804
3805 spec = zalloc(topo->nr * sizeof(char *));
3806 if (!spec) {
3807 pr_err("Failed to allocate NUMA spec\n");
3808 ret = -ENOMEM;
3809 goto out_delete_topo;
3810 }
3811 for (s = 0; s < topo->nr; s++)
3812 spec[s] = topo->nodes[s].cpus;
3813
3814 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3815
3816 zfree(&spec);
3817
3818 out_delete_topo:
3819 numa_topology__delete(topo);
3820
3821 return ret;
3822 }
3823
record__init_thread_user_masks(struct record * rec,struct perf_cpu_map * cpus)3824 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3825 {
3826 int t, ret;
3827 u32 s, nr_spec = 0;
3828 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3829 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3830
3831 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3832 spec = strtok_r(user_spec, ":", &spec_ptr);
3833 if (spec == NULL)
3834 break;
3835 pr_debug2("threads_spec[%d]: %s\n", t, spec);
3836 mask = strtok_r(spec, "/", &mask_ptr);
3837 if (mask == NULL)
3838 break;
3839 pr_debug2(" maps mask: %s\n", mask);
3840 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3841 if (!tmp_spec) {
3842 pr_err("Failed to reallocate maps spec\n");
3843 ret = -ENOMEM;
3844 goto out_free;
3845 }
3846 maps_spec = tmp_spec;
3847 maps_spec[nr_spec] = dup_mask = strdup(mask);
3848 if (!maps_spec[nr_spec]) {
3849 pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3850 ret = -ENOMEM;
3851 goto out_free;
3852 }
3853 mask = strtok_r(NULL, "/", &mask_ptr);
3854 if (mask == NULL) {
3855 pr_err("Invalid thread maps or affinity specs\n");
3856 ret = -EINVAL;
3857 goto out_free;
3858 }
3859 pr_debug2(" affinity mask: %s\n", mask);
3860 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3861 if (!tmp_spec) {
3862 pr_err("Failed to reallocate affinity spec\n");
3863 ret = -ENOMEM;
3864 goto out_free;
3865 }
3866 affinity_spec = tmp_spec;
3867 affinity_spec[nr_spec] = strdup(mask);
3868 if (!affinity_spec[nr_spec]) {
3869 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3870 ret = -ENOMEM;
3871 goto out_free;
3872 }
3873 dup_mask = NULL;
3874 nr_spec++;
3875 }
3876
3877 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3878 (const char **)affinity_spec, nr_spec);
3879
3880 out_free:
3881 free(dup_mask);
3882 for (s = 0; s < nr_spec; s++) {
3883 if (maps_spec)
3884 free(maps_spec[s]);
3885 if (affinity_spec)
3886 free(affinity_spec[s]);
3887 }
3888 free(affinity_spec);
3889 free(maps_spec);
3890
3891 return ret;
3892 }
3893
record__init_thread_default_masks(struct record * rec,struct perf_cpu_map * cpus)3894 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3895 {
3896 int ret;
3897
3898 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3899 if (ret)
3900 return ret;
3901
3902 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
3903 return -ENODEV;
3904
3905 rec->nr_threads = 1;
3906
3907 return 0;
3908 }
3909
record__init_thread_masks(struct record * rec)3910 static int record__init_thread_masks(struct record *rec)
3911 {
3912 int ret = 0;
3913 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3914
3915 if (!record__threads_enabled(rec))
3916 return record__init_thread_default_masks(rec, cpus);
3917
3918 if (evlist__per_thread(rec->evlist)) {
3919 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3920 return -EINVAL;
3921 }
3922
3923 switch (rec->opts.threads_spec) {
3924 case THREAD_SPEC__CPU:
3925 ret = record__init_thread_cpu_masks(rec, cpus);
3926 break;
3927 case THREAD_SPEC__CORE:
3928 ret = record__init_thread_core_masks(rec, cpus);
3929 break;
3930 case THREAD_SPEC__PACKAGE:
3931 ret = record__init_thread_package_masks(rec, cpus);
3932 break;
3933 case THREAD_SPEC__NUMA:
3934 ret = record__init_thread_numa_masks(rec, cpus);
3935 break;
3936 case THREAD_SPEC__USER:
3937 ret = record__init_thread_user_masks(rec, cpus);
3938 break;
3939 default:
3940 break;
3941 }
3942
3943 return ret;
3944 }
3945
cmd_record(int argc,const char ** argv)3946 int cmd_record(int argc, const char **argv)
3947 {
3948 int err;
3949 struct record *rec = &record;
3950 char errbuf[BUFSIZ];
3951
3952 setlocale(LC_ALL, "");
3953
3954 #ifndef HAVE_BPF_SKEL
3955 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3956 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3957 # undef set_nobuild
3958 #endif
3959
3960 rec->opts.affinity = PERF_AFFINITY_SYS;
3961
3962 rec->evlist = evlist__new();
3963 if (rec->evlist == NULL)
3964 return -ENOMEM;
3965
3966 err = perf_config(perf_record_config, rec);
3967 if (err)
3968 return err;
3969
3970 argc = parse_options(argc, argv, record_options, record_usage,
3971 PARSE_OPT_STOP_AT_NON_OPTION);
3972 if (quiet)
3973 perf_quiet_option();
3974
3975 err = symbol__validate_sym_arguments();
3976 if (err)
3977 return err;
3978
3979 perf_debuginfod_setup(&record.debuginfod);
3980
3981 /* Make system wide (-a) the default target. */
3982 if (!argc && target__none(&rec->opts.target))
3983 rec->opts.target.system_wide = true;
3984
3985 if (nr_cgroups && !rec->opts.target.system_wide) {
3986 usage_with_options_msg(record_usage, record_options,
3987 "cgroup monitoring only available in system-wide mode");
3988
3989 }
3990
3991 if (rec->buildid_mmap) {
3992 if (!perf_can_record_build_id()) {
3993 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3994 err = -EINVAL;
3995 goto out_opts;
3996 }
3997 pr_debug("Enabling build id in mmap2 events.\n");
3998 /* Enable mmap build id synthesizing. */
3999 symbol_conf.buildid_mmap2 = true;
4000 /* Enable perf_event_attr::build_id bit. */
4001 rec->opts.build_id = true;
4002 /* Disable build id cache. */
4003 rec->no_buildid = true;
4004 }
4005
4006 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4007 pr_err("Kernel has no cgroup sampling support.\n");
4008 err = -EINVAL;
4009 goto out_opts;
4010 }
4011
4012 if (rec->opts.kcore)
4013 rec->opts.text_poke = true;
4014
4015 if (rec->opts.kcore || record__threads_enabled(rec))
4016 rec->data.is_dir = true;
4017
4018 if (record__threads_enabled(rec)) {
4019 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4020 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4021 goto out_opts;
4022 }
4023 if (record__aio_enabled(rec)) {
4024 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4025 goto out_opts;
4026 }
4027 }
4028
4029 if (rec->opts.comp_level != 0) {
4030 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4031 rec->no_buildid = true;
4032 }
4033
4034 if (rec->opts.record_switch_events &&
4035 !perf_can_record_switch_events()) {
4036 ui__error("kernel does not support recording context switch events\n");
4037 parse_options_usage(record_usage, record_options, "switch-events", 0);
4038 err = -EINVAL;
4039 goto out_opts;
4040 }
4041
4042 if (switch_output_setup(rec)) {
4043 parse_options_usage(record_usage, record_options, "switch-output", 0);
4044 err = -EINVAL;
4045 goto out_opts;
4046 }
4047
4048 if (rec->switch_output.time) {
4049 signal(SIGALRM, alarm_sig_handler);
4050 alarm(rec->switch_output.time);
4051 }
4052
4053 if (rec->switch_output.num_files) {
4054 rec->switch_output.filenames = calloc(sizeof(char *),
4055 rec->switch_output.num_files);
4056 if (!rec->switch_output.filenames) {
4057 err = -EINVAL;
4058 goto out_opts;
4059 }
4060 }
4061
4062 if (rec->timestamp_filename && record__threads_enabled(rec)) {
4063 rec->timestamp_filename = false;
4064 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4065 }
4066
4067 /*
4068 * Allow aliases to facilitate the lookup of symbols for address
4069 * filters. Refer to auxtrace_parse_filters().
4070 */
4071 symbol_conf.allow_aliases = true;
4072
4073 symbol__init(NULL);
4074
4075 err = record__auxtrace_init(rec);
4076 if (err)
4077 goto out;
4078
4079 if (dry_run)
4080 goto out;
4081
4082 err = -ENOMEM;
4083
4084 if (rec->no_buildid_cache || rec->no_buildid) {
4085 disable_buildid_cache();
4086 } else if (rec->switch_output.enabled) {
4087 /*
4088 * In 'perf record --switch-output', disable buildid
4089 * generation by default to reduce data file switching
4090 * overhead. Still generate buildid if they are required
4091 * explicitly using
4092 *
4093 * perf record --switch-output --no-no-buildid \
4094 * --no-no-buildid-cache
4095 *
4096 * Following code equals to:
4097 *
4098 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4099 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4100 * disable_buildid_cache();
4101 */
4102 bool disable = true;
4103
4104 if (rec->no_buildid_set && !rec->no_buildid)
4105 disable = false;
4106 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4107 disable = false;
4108 if (disable) {
4109 rec->no_buildid = true;
4110 rec->no_buildid_cache = true;
4111 disable_buildid_cache();
4112 }
4113 }
4114
4115 if (record.opts.overwrite)
4116 record.opts.tail_synthesize = true;
4117
4118 if (rec->evlist->core.nr_entries == 0) {
4119 bool can_profile_kernel = perf_event_paranoid_check(1);
4120
4121 err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
4122 if (err)
4123 goto out;
4124 }
4125
4126 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4127 rec->opts.no_inherit = true;
4128
4129 err = target__validate(&rec->opts.target);
4130 if (err) {
4131 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4132 ui__warning("%s\n", errbuf);
4133 }
4134
4135 err = target__parse_uid(&rec->opts.target);
4136 if (err) {
4137 int saved_errno = errno;
4138
4139 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4140 ui__error("%s", errbuf);
4141
4142 err = -saved_errno;
4143 goto out;
4144 }
4145
4146 /* Enable ignoring missing threads when -u/-p option is defined. */
4147 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
4148
4149 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4150
4151 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4152 arch__add_leaf_frame_record_opts(&rec->opts);
4153
4154 err = -ENOMEM;
4155 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4156 if (rec->opts.target.pid != NULL) {
4157 pr_err("Couldn't create thread/CPU maps: %s\n",
4158 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4159 goto out;
4160 }
4161 else
4162 usage_with_options(record_usage, record_options);
4163 }
4164
4165 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4166 if (err)
4167 goto out;
4168
4169 /*
4170 * We take all buildids when the file contains
4171 * AUX area tracing data because we do not decode the
4172 * trace because it would take too long.
4173 */
4174 if (rec->opts.full_auxtrace)
4175 rec->buildid_all = true;
4176
4177 if (rec->opts.text_poke) {
4178 err = record__config_text_poke(rec->evlist);
4179 if (err) {
4180 pr_err("record__config_text_poke failed, error %d\n", err);
4181 goto out;
4182 }
4183 }
4184
4185 if (rec->off_cpu) {
4186 err = record__config_off_cpu(rec);
4187 if (err) {
4188 pr_err("record__config_off_cpu failed, error %d\n", err);
4189 goto out;
4190 }
4191 }
4192
4193 if (record_opts__config(&rec->opts)) {
4194 err = -EINVAL;
4195 goto out;
4196 }
4197
4198 err = record__init_thread_masks(rec);
4199 if (err) {
4200 pr_err("Failed to initialize parallel data streaming masks\n");
4201 goto out;
4202 }
4203
4204 if (rec->opts.nr_cblocks > nr_cblocks_max)
4205 rec->opts.nr_cblocks = nr_cblocks_max;
4206 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4207
4208 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4209 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4210
4211 if (rec->opts.comp_level > comp_level_max)
4212 rec->opts.comp_level = comp_level_max;
4213 pr_debug("comp level: %d\n", rec->opts.comp_level);
4214
4215 err = __cmd_record(&record, argc, argv);
4216 out:
4217 evlist__delete(rec->evlist);
4218 symbol__exit();
4219 auxtrace_record__free(rec->itr);
4220 out_opts:
4221 record__free_thread_masks(rec, rec->nr_threads);
4222 rec->nr_threads = 0;
4223 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4224 return err;
4225 }
4226
snapshot_sig_handler(int sig __maybe_unused)4227 static void snapshot_sig_handler(int sig __maybe_unused)
4228 {
4229 struct record *rec = &record;
4230
4231 hit_auxtrace_snapshot_trigger(rec);
4232
4233 if (switch_output_signal(rec))
4234 trigger_hit(&switch_output_trigger);
4235 }
4236
alarm_sig_handler(int sig __maybe_unused)4237 static void alarm_sig_handler(int sig __maybe_unused)
4238 {
4239 struct record *rec = &record;
4240
4241 if (switch_output_time(rec))
4242 trigger_hit(&switch_output_trigger);
4243 }
4244