1 // SPDX-License-Identifier: GPL-2.0
2 #include "debug.h"
3 #include "evlist.h"
4 #include "evsel.h"
5 #include "evsel_config.h"
6 #include "parse-events.h"
7 #include <errno.h>
8 #include <limits.h>
9 #include <stdlib.h>
10 #include <api/fs/fs.h>
11 #include <subcmd/parse-options.h>
12 #include <perf/cpumap.h>
13 #include "cloexec.h"
14 #include "util/perf_api_probe.h"
15 #include "record.h"
16 #include "../perf-sys.h"
17 #include "topdown.h"
18 #include "map_symbol.h"
19 #include "mem-events.h"
20 
21 /*
22  * evsel__config_leader_sampling() uses special rules for leader sampling.
23  * However, if the leader is an AUX area event, then assume the event to sample
24  * is the next event.
25  */
evsel__read_sampler(struct evsel * evsel,struct evlist * evlist)26 static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist)
27 {
28 	struct evsel *leader = evsel__leader(evsel);
29 
30 	if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader) ||
31 	    is_mem_loads_aux_event(leader)) {
32 		evlist__for_each_entry(evlist, evsel) {
33 			if (evsel__leader(evsel) == leader && evsel != evsel__leader(evsel))
34 				return evsel;
35 		}
36 	}
37 
38 	return leader;
39 }
40 
evsel__config_term_mask(struct evsel * evsel)41 static u64 evsel__config_term_mask(struct evsel *evsel)
42 {
43 	struct evsel_config_term *term;
44 	struct list_head *config_terms = &evsel->config_terms;
45 	u64 term_types = 0;
46 
47 	list_for_each_entry(term, config_terms, list) {
48 		term_types |= 1 << term->type;
49 	}
50 	return term_types;
51 }
52 
evsel__config_leader_sampling(struct evsel * evsel,struct evlist * evlist)53 static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
54 {
55 	struct perf_event_attr *attr = &evsel->core.attr;
56 	struct evsel *leader = evsel__leader(evsel);
57 	struct evsel *read_sampler;
58 	u64 term_types, freq_mask;
59 
60 	if (!leader->sample_read)
61 		return;
62 
63 	read_sampler = evsel__read_sampler(evsel, evlist);
64 
65 	if (evsel == read_sampler)
66 		return;
67 
68 	term_types = evsel__config_term_mask(evsel);
69 	/*
70 	 * Disable sampling for all group members except those with explicit
71 	 * config terms or the leader. In the case of an AUX area event, the 2nd
72 	 * event in the group is the one that 'leads' the sampling.
73 	 */
74 	freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD);
75 	if ((term_types & freq_mask) == 0) {
76 		attr->freq           = 0;
77 		attr->sample_freq    = 0;
78 		attr->sample_period  = 0;
79 	}
80 	if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0)
81 		attr->write_backward = 0;
82 
83 	/*
84 	 * We don't get a sample for slave events, we make them when delivering
85 	 * the group leader sample. Set the slave event to follow the master
86 	 * sample_type to ease up reporting.
87 	 * An AUX area event also has sample_type requirements, so also include
88 	 * the sample type bits from the leader's sample_type to cover that
89 	 * case.
90 	 */
91 	attr->sample_type = read_sampler->core.attr.sample_type |
92 			    leader->core.attr.sample_type;
93 }
94 
evlist__config(struct evlist * evlist,struct record_opts * opts,struct callchain_param * callchain)95 void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain)
96 {
97 	struct evsel *evsel;
98 	bool use_sample_identifier = false;
99 	bool use_comm_exec;
100 	bool sample_id = opts->sample_id;
101 
102 	/*
103 	 * Set the evsel leader links before we configure attributes,
104 	 * since some might depend on this info.
105 	 */
106 	if (opts->group)
107 		evlist__set_leader(evlist);
108 
109 	if (perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0).cpu < 0)
110 		opts->no_inherit = true;
111 
112 	use_comm_exec = perf_can_comm_exec();
113 
114 	evlist__for_each_entry(evlist, evsel) {
115 		evsel__config(evsel, opts, callchain);
116 		if (evsel->tracking && use_comm_exec)
117 			evsel->core.attr.comm_exec = 1;
118 	}
119 
120 	/* Configure leader sampling here now that the sample type is known */
121 	evlist__for_each_entry(evlist, evsel)
122 		evsel__config_leader_sampling(evsel, evlist);
123 
124 	if (opts->full_auxtrace) {
125 		/*
126 		 * Need to be able to synthesize and parse selected events with
127 		 * arbitrary sample types, which requires always being able to
128 		 * match the id.
129 		 */
130 		use_sample_identifier = perf_can_sample_identifier();
131 		sample_id = true;
132 	} else if (evlist->core.nr_entries > 1) {
133 		struct evsel *first = evlist__first(evlist);
134 
135 		evlist__for_each_entry(evlist, evsel) {
136 			if (evsel->core.attr.sample_type == first->core.attr.sample_type)
137 				continue;
138 			use_sample_identifier = perf_can_sample_identifier();
139 			break;
140 		}
141 		sample_id = true;
142 	}
143 
144 	if (sample_id) {
145 		evlist__for_each_entry(evlist, evsel)
146 			evsel__set_sample_id(evsel, use_sample_identifier);
147 	}
148 
149 	evlist__set_id_pos(evlist);
150 }
151 
get_max_rate(unsigned int * rate)152 static int get_max_rate(unsigned int *rate)
153 {
154 	return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
155 }
156 
record_opts__config_freq(struct record_opts * opts)157 static int record_opts__config_freq(struct record_opts *opts)
158 {
159 	bool user_freq = opts->user_freq != UINT_MAX;
160 	bool user_interval = opts->user_interval != ULLONG_MAX;
161 	unsigned int max_rate;
162 
163 	if (user_interval && user_freq) {
164 		pr_err("cannot set frequency and period at the same time\n");
165 		return -1;
166 	}
167 
168 	if (user_interval)
169 		opts->default_interval = opts->user_interval;
170 	if (user_freq)
171 		opts->freq = opts->user_freq;
172 
173 	/*
174 	 * User specified count overrides default frequency.
175 	 */
176 	if (opts->default_interval)
177 		opts->freq = 0;
178 	else if (opts->freq) {
179 		opts->default_interval = opts->freq;
180 	} else {
181 		pr_err("frequency and count are zero, aborting\n");
182 		return -1;
183 	}
184 
185 	if (get_max_rate(&max_rate))
186 		return 0;
187 
188 	/*
189 	 * User specified frequency is over current maximum.
190 	 */
191 	if (user_freq && (max_rate < opts->freq)) {
192 		if (opts->strict_freq) {
193 			pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
194 			       "       Please use -F freq option with a lower value or consider\n"
195 			       "       tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
196 			       max_rate);
197 			return -1;
198 		} else {
199 			pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
200 				   "         The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
201 				   "         The kernel will lower it when perf's interrupts take too long.\n"
202 				   "         Use --strict-freq to disable this throttling, refusing to record.\n",
203 				   max_rate, opts->freq, max_rate);
204 
205 			opts->freq = max_rate;
206 		}
207 	}
208 
209 	/*
210 	 * Default frequency is over current maximum.
211 	 */
212 	if (max_rate < opts->freq) {
213 		pr_warning("Lowering default frequency rate from %u to %u.\n"
214 			   "Please consider tweaking "
215 			   "/proc/sys/kernel/perf_event_max_sample_rate.\n",
216 			   opts->freq, max_rate);
217 		opts->freq = max_rate;
218 	}
219 
220 	return 0;
221 }
222 
record_opts__config(struct record_opts * opts)223 int record_opts__config(struct record_opts *opts)
224 {
225 	return record_opts__config_freq(opts);
226 }
227 
evlist__can_select_event(struct evlist * evlist,const char * str)228 bool evlist__can_select_event(struct evlist *evlist, const char *str)
229 {
230 	struct evlist *temp_evlist;
231 	struct evsel *evsel;
232 	int err, fd;
233 	struct perf_cpu cpu = { .cpu = 0 };
234 	bool ret = false;
235 	pid_t pid = -1;
236 
237 	temp_evlist = evlist__new();
238 	if (!temp_evlist)
239 		return false;
240 
241 	err = parse_events(temp_evlist, str, NULL);
242 	if (err)
243 		goto out_delete;
244 
245 	evsel = evlist__last(temp_evlist);
246 
247 	if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) {
248 		struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
249 
250 		if (cpus)
251 			cpu =  perf_cpu_map__cpu(cpus, 0);
252 
253 		perf_cpu_map__put(cpus);
254 	} else {
255 		cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0);
256 	}
257 
258 	while (1) {
259 		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1,
260 					 perf_event_open_cloexec_flag());
261 		if (fd < 0) {
262 			if (pid == -1 && errno == EACCES) {
263 				pid = 0;
264 				continue;
265 			}
266 			goto out_delete;
267 		}
268 		break;
269 	}
270 	close(fd);
271 	ret = true;
272 
273 out_delete:
274 	evlist__delete(temp_evlist);
275 	return ret;
276 }
277 
record__parse_freq(const struct option * opt,const char * str,int unset __maybe_unused)278 int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
279 {
280 	unsigned int freq;
281 	struct record_opts *opts = opt->value;
282 
283 	if (!str)
284 		return -EINVAL;
285 
286 	if (strcasecmp(str, "max") == 0) {
287 		if (get_max_rate(&freq)) {
288 			pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
289 			return -1;
290 		}
291 		pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
292 	} else {
293 		freq = atoi(str);
294 	}
295 
296 	opts->user_freq = freq;
297 	return 0;
298 }
299