1 // SPDX-License-Identifier: GPL-2.0
2 #include <stddef.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <unistd.h>
9 #include <api/fs/fs.h>
10 #include <linux/kernel.h>
11 #include "map_symbol.h"
12 #include "mem-events.h"
13 #include "debug.h"
14 #include "symbol.h"
15 #include "pmu.h"
16 #include "pmu-hybrid.h"
17
18 unsigned int perf_mem_events__loads_ldlat = 30;
19
20 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
21
22 static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
23 E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"),
24 E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"),
25 E(NULL, NULL, NULL),
26 };
27 #undef E
28
29 static char mem_loads_name[100];
30 static bool mem_loads_name__init;
31
perf_mem_events__ptr(int i)32 struct perf_mem_event * __weak perf_mem_events__ptr(int i)
33 {
34 if (i >= PERF_MEM_EVENTS__MAX)
35 return NULL;
36
37 return &perf_mem_events[i];
38 }
39
perf_mem_events__name(int i,char * pmu_name __maybe_unused)40 char * __weak perf_mem_events__name(int i, char *pmu_name __maybe_unused)
41 {
42 struct perf_mem_event *e = perf_mem_events__ptr(i);
43
44 if (!e)
45 return NULL;
46
47 if (i == PERF_MEM_EVENTS__LOAD) {
48 if (!mem_loads_name__init) {
49 mem_loads_name__init = true;
50 scnprintf(mem_loads_name, sizeof(mem_loads_name),
51 e->name, perf_mem_events__loads_ldlat);
52 }
53 return mem_loads_name;
54 }
55
56 return (char *)e->name;
57 }
58
is_mem_loads_aux_event(struct evsel * leader __maybe_unused)59 __weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused)
60 {
61 return false;
62 }
63
perf_mem_events__parse(const char * str)64 int perf_mem_events__parse(const char *str)
65 {
66 char *tok, *saveptr = NULL;
67 bool found = false;
68 char *buf;
69 int j;
70
71 /* We need buffer that we know we can write to. */
72 buf = malloc(strlen(str) + 1);
73 if (!buf)
74 return -ENOMEM;
75
76 strcpy(buf, str);
77
78 tok = strtok_r((char *)buf, ",", &saveptr);
79
80 while (tok) {
81 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
82 struct perf_mem_event *e = perf_mem_events__ptr(j);
83
84 if (!e->tag)
85 continue;
86
87 if (strstr(e->tag, tok))
88 e->record = found = true;
89 }
90
91 tok = strtok_r(NULL, ",", &saveptr);
92 }
93
94 free(buf);
95
96 if (found)
97 return 0;
98
99 pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
100 return -1;
101 }
102
perf_mem_event__supported(const char * mnt,char * sysfs_name)103 static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
104 {
105 char path[PATH_MAX];
106 struct stat st;
107
108 scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
109 return !stat(path, &st);
110 }
111
perf_mem_events__init(void)112 int perf_mem_events__init(void)
113 {
114 const char *mnt = sysfs__mount();
115 bool found = false;
116 int j;
117
118 if (!mnt)
119 return -ENOENT;
120
121 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
122 struct perf_mem_event *e = perf_mem_events__ptr(j);
123 struct perf_pmu *pmu;
124 char sysfs_name[100];
125
126 /*
127 * If the event entry isn't valid, skip initialization
128 * and "e->supported" will keep false.
129 */
130 if (!e->tag)
131 continue;
132
133 if (!perf_pmu__has_hybrid()) {
134 scnprintf(sysfs_name, sizeof(sysfs_name),
135 e->sysfs_name, "cpu");
136 e->supported = perf_mem_event__supported(mnt, sysfs_name);
137 } else {
138 perf_pmu__for_each_hybrid_pmu(pmu) {
139 scnprintf(sysfs_name, sizeof(sysfs_name),
140 e->sysfs_name, pmu->name);
141 e->supported |= perf_mem_event__supported(mnt, sysfs_name);
142 }
143 }
144
145 if (e->supported)
146 found = true;
147 }
148
149 return found ? 0 : -ENOENT;
150 }
151
perf_mem_events__list(void)152 void perf_mem_events__list(void)
153 {
154 int j;
155
156 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
157 struct perf_mem_event *e = perf_mem_events__ptr(j);
158
159 fprintf(stderr, "%-13s%-*s%s\n",
160 e->tag ?: "",
161 verbose > 0 ? 25 : 0,
162 verbose > 0 ? perf_mem_events__name(j, NULL) : "",
163 e->supported ? ": available" : "");
164 }
165 }
166
perf_mem_events__print_unsupport_hybrid(struct perf_mem_event * e,int idx)167 static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
168 int idx)
169 {
170 const char *mnt = sysfs__mount();
171 char sysfs_name[100];
172 struct perf_pmu *pmu;
173
174 perf_pmu__for_each_hybrid_pmu(pmu) {
175 scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
176 pmu->name);
177 if (!perf_mem_event__supported(mnt, sysfs_name)) {
178 pr_err("failed: event '%s' not supported\n",
179 perf_mem_events__name(idx, pmu->name));
180 }
181 }
182 }
183
perf_mem_events__record_args(const char ** rec_argv,int * argv_nr,char ** rec_tmp,int * tmp_nr)184 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
185 char **rec_tmp, int *tmp_nr)
186 {
187 int i = *argv_nr, k = 0;
188 struct perf_mem_event *e;
189 struct perf_pmu *pmu;
190 char *s;
191
192 for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
193 e = perf_mem_events__ptr(j);
194 if (!e->record)
195 continue;
196
197 if (!perf_pmu__has_hybrid()) {
198 if (!e->supported) {
199 pr_err("failed: event '%s' not supported\n",
200 perf_mem_events__name(j, NULL));
201 return -1;
202 }
203
204 rec_argv[i++] = "-e";
205 rec_argv[i++] = perf_mem_events__name(j, NULL);
206 } else {
207 if (!e->supported) {
208 perf_mem_events__print_unsupport_hybrid(e, j);
209 return -1;
210 }
211
212 perf_pmu__for_each_hybrid_pmu(pmu) {
213 rec_argv[i++] = "-e";
214 s = perf_mem_events__name(j, pmu->name);
215 if (s) {
216 s = strdup(s);
217 if (!s)
218 return -1;
219
220 rec_argv[i++] = s;
221 rec_tmp[k++] = s;
222 }
223 }
224 }
225 }
226
227 *argv_nr = i;
228 *tmp_nr = k;
229 return 0;
230 }
231
232 static const char * const tlb_access[] = {
233 "N/A",
234 "HIT",
235 "MISS",
236 "L1",
237 "L2",
238 "Walker",
239 "Fault",
240 };
241
perf_mem__tlb_scnprintf(char * out,size_t sz,struct mem_info * mem_info)242 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
243 {
244 size_t l = 0, i;
245 u64 m = PERF_MEM_TLB_NA;
246 u64 hit, miss;
247
248 sz -= 1; /* -1 for null termination */
249 out[0] = '\0';
250
251 if (mem_info)
252 m = mem_info->data_src.mem_dtlb;
253
254 hit = m & PERF_MEM_TLB_HIT;
255 miss = m & PERF_MEM_TLB_MISS;
256
257 /* already taken care of */
258 m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
259
260 for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
261 if (!(m & 0x1))
262 continue;
263 if (l) {
264 strcat(out, " or ");
265 l += 4;
266 }
267 l += scnprintf(out + l, sz - l, tlb_access[i]);
268 }
269 if (*out == '\0')
270 l += scnprintf(out, sz - l, "N/A");
271 if (hit)
272 l += scnprintf(out + l, sz - l, " hit");
273 if (miss)
274 l += scnprintf(out + l, sz - l, " miss");
275
276 return l;
277 }
278
279 static const char * const mem_lvl[] = {
280 "N/A",
281 "HIT",
282 "MISS",
283 "L1",
284 "LFB",
285 "L2",
286 "L3",
287 "Local RAM",
288 "Remote RAM (1 hop)",
289 "Remote RAM (2 hops)",
290 "Remote Cache (1 hop)",
291 "Remote Cache (2 hops)",
292 "I/O",
293 "Uncached",
294 };
295
296 static const char * const mem_lvlnum[] = {
297 [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
298 [PERF_MEM_LVLNUM_LFB] = "LFB",
299 [PERF_MEM_LVLNUM_RAM] = "RAM",
300 [PERF_MEM_LVLNUM_PMEM] = "PMEM",
301 [PERF_MEM_LVLNUM_NA] = "N/A",
302 };
303
304 static const char * const mem_hops[] = {
305 "N/A",
306 /*
307 * While printing, 'Remote' will be added to represent
308 * 'Remote core, same node' accesses as remote field need
309 * to be set with mem_hops field.
310 */
311 "core, same node",
312 "node, same socket",
313 "socket, same board",
314 "board",
315 };
316
perf_mem__op_scnprintf(char * out,size_t sz,struct mem_info * mem_info)317 static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
318 {
319 u64 op = PERF_MEM_LOCK_NA;
320 int l;
321
322 if (mem_info)
323 op = mem_info->data_src.mem_op;
324
325 if (op & PERF_MEM_OP_NA)
326 l = scnprintf(out, sz, "N/A");
327 else if (op & PERF_MEM_OP_LOAD)
328 l = scnprintf(out, sz, "LOAD");
329 else if (op & PERF_MEM_OP_STORE)
330 l = scnprintf(out, sz, "STORE");
331 else if (op & PERF_MEM_OP_PFETCH)
332 l = scnprintf(out, sz, "PFETCH");
333 else if (op & PERF_MEM_OP_EXEC)
334 l = scnprintf(out, sz, "EXEC");
335 else
336 l = scnprintf(out, sz, "No");
337
338 return l;
339 }
340
perf_mem__lvl_scnprintf(char * out,size_t sz,struct mem_info * mem_info)341 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
342 {
343 size_t i, l = 0;
344 u64 m = PERF_MEM_LVL_NA;
345 u64 hit, miss;
346 int printed = 0;
347
348 if (mem_info)
349 m = mem_info->data_src.mem_lvl;
350
351 sz -= 1; /* -1 for null termination */
352 out[0] = '\0';
353
354 hit = m & PERF_MEM_LVL_HIT;
355 miss = m & PERF_MEM_LVL_MISS;
356
357 /* already taken care of */
358 m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
359
360 if (mem_info && mem_info->data_src.mem_remote) {
361 strcat(out, "Remote ");
362 l += 7;
363 }
364
365 /*
366 * Incase mem_hops field is set, we can skip printing data source via
367 * PERF_MEM_LVL namespace.
368 */
369 if (mem_info && mem_info->data_src.mem_hops) {
370 l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
371 } else {
372 for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
373 if (!(m & 0x1))
374 continue;
375 if (printed++) {
376 strcat(out, " or ");
377 l += 4;
378 }
379 l += scnprintf(out + l, sz - l, mem_lvl[i]);
380 }
381 }
382
383 if (mem_info && mem_info->data_src.mem_lvl_num) {
384 int lvl = mem_info->data_src.mem_lvl_num;
385 if (printed++) {
386 strcat(out, " or ");
387 l += 4;
388 }
389 if (mem_lvlnum[lvl])
390 l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
391 else
392 l += scnprintf(out + l, sz - l, "L%d", lvl);
393 }
394
395 if (l == 0)
396 l += scnprintf(out + l, sz - l, "N/A");
397 if (hit)
398 l += scnprintf(out + l, sz - l, " hit");
399 if (miss)
400 l += scnprintf(out + l, sz - l, " miss");
401
402 return l;
403 }
404
405 static const char * const snoop_access[] = {
406 "N/A",
407 "None",
408 "Hit",
409 "Miss",
410 "HitM",
411 };
412
perf_mem__snp_scnprintf(char * out,size_t sz,struct mem_info * mem_info)413 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
414 {
415 size_t i, l = 0;
416 u64 m = PERF_MEM_SNOOP_NA;
417
418 sz -= 1; /* -1 for null termination */
419 out[0] = '\0';
420
421 if (mem_info)
422 m = mem_info->data_src.mem_snoop;
423
424 for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
425 if (!(m & 0x1))
426 continue;
427 if (l) {
428 strcat(out, " or ");
429 l += 4;
430 }
431 l += scnprintf(out + l, sz - l, snoop_access[i]);
432 }
433 if (mem_info &&
434 (mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) {
435 if (l) {
436 strcat(out, " or ");
437 l += 4;
438 }
439 l += scnprintf(out + l, sz - l, "Fwd");
440 }
441
442 if (*out == '\0')
443 l += scnprintf(out, sz - l, "N/A");
444
445 return l;
446 }
447
perf_mem__lck_scnprintf(char * out,size_t sz,struct mem_info * mem_info)448 int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
449 {
450 u64 mask = PERF_MEM_LOCK_NA;
451 int l;
452
453 if (mem_info)
454 mask = mem_info->data_src.mem_lock;
455
456 if (mask & PERF_MEM_LOCK_NA)
457 l = scnprintf(out, sz, "N/A");
458 else if (mask & PERF_MEM_LOCK_LOCKED)
459 l = scnprintf(out, sz, "Yes");
460 else
461 l = scnprintf(out, sz, "No");
462
463 return l;
464 }
465
perf_mem__blk_scnprintf(char * out,size_t sz,struct mem_info * mem_info)466 int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
467 {
468 size_t l = 0;
469 u64 mask = PERF_MEM_BLK_NA;
470
471 sz -= 1; /* -1 for null termination */
472 out[0] = '\0';
473
474 if (mem_info)
475 mask = mem_info->data_src.mem_blk;
476
477 if (!mask || (mask & PERF_MEM_BLK_NA)) {
478 l += scnprintf(out + l, sz - l, " N/A");
479 return l;
480 }
481 if (mask & PERF_MEM_BLK_DATA)
482 l += scnprintf(out + l, sz - l, " Data");
483 if (mask & PERF_MEM_BLK_ADDR)
484 l += scnprintf(out + l, sz - l, " Addr");
485
486 return l;
487 }
488
perf_script__meminfo_scnprintf(char * out,size_t sz,struct mem_info * mem_info)489 int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
490 {
491 int i = 0;
492
493 i += scnprintf(out, sz, "|OP ");
494 i += perf_mem__op_scnprintf(out + i, sz - i, mem_info);
495 i += scnprintf(out + i, sz - i, "|LVL ");
496 i += perf_mem__lvl_scnprintf(out + i, sz, mem_info);
497 i += scnprintf(out + i, sz - i, "|SNP ");
498 i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
499 i += scnprintf(out + i, sz - i, "|TLB ");
500 i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
501 i += scnprintf(out + i, sz - i, "|LCK ");
502 i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
503 i += scnprintf(out + i, sz - i, "|BLK ");
504 i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
505
506 return i;
507 }
508
c2c_decode_stats(struct c2c_stats * stats,struct mem_info * mi)509 int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
510 {
511 union perf_mem_data_src *data_src = &mi->data_src;
512 u64 daddr = mi->daddr.addr;
513 u64 op = data_src->mem_op;
514 u64 lvl = data_src->mem_lvl;
515 u64 snoop = data_src->mem_snoop;
516 u64 lock = data_src->mem_lock;
517 u64 blk = data_src->mem_blk;
518 /*
519 * Skylake might report unknown remote level via this
520 * bit, consider it when evaluating remote HITMs.
521 *
522 * Incase of power, remote field can also be used to denote cache
523 * accesses from the another core of same node. Hence, setting
524 * mrem only when HOPS is zero along with set remote field.
525 */
526 bool mrem = (data_src->mem_remote && !data_src->mem_hops);
527 int err = 0;
528
529 #define HITM_INC(__f) \
530 do { \
531 stats->__f++; \
532 stats->tot_hitm++; \
533 } while (0)
534
535 #define P(a, b) PERF_MEM_##a##_##b
536
537 stats->nr_entries++;
538
539 if (lock & P(LOCK, LOCKED)) stats->locks++;
540
541 if (blk & P(BLK, DATA)) stats->blk_data++;
542 if (blk & P(BLK, ADDR)) stats->blk_addr++;
543
544 if (op & P(OP, LOAD)) {
545 /* load */
546 stats->load++;
547
548 if (!daddr) {
549 stats->ld_noadrs++;
550 return -1;
551 }
552
553 if (lvl & P(LVL, HIT)) {
554 if (lvl & P(LVL, UNC)) stats->ld_uncache++;
555 if (lvl & P(LVL, IO)) stats->ld_io++;
556 if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
557 if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
558 if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
559 if (lvl & P(LVL, L3 )) {
560 if (snoop & P(SNOOP, HITM))
561 HITM_INC(lcl_hitm);
562 else
563 stats->ld_llchit++;
564 }
565
566 if (lvl & P(LVL, LOC_RAM)) {
567 stats->lcl_dram++;
568 if (snoop & P(SNOOP, HIT))
569 stats->ld_shared++;
570 else
571 stats->ld_excl++;
572 }
573
574 if ((lvl & P(LVL, REM_RAM1)) ||
575 (lvl & P(LVL, REM_RAM2)) ||
576 mrem) {
577 stats->rmt_dram++;
578 if (snoop & P(SNOOP, HIT))
579 stats->ld_shared++;
580 else
581 stats->ld_excl++;
582 }
583 }
584
585 if ((lvl & P(LVL, REM_CCE1)) ||
586 (lvl & P(LVL, REM_CCE2)) ||
587 mrem) {
588 if (snoop & P(SNOOP, HIT))
589 stats->rmt_hit++;
590 else if (snoop & P(SNOOP, HITM))
591 HITM_INC(rmt_hitm);
592 }
593
594 if ((lvl & P(LVL, MISS)))
595 stats->ld_miss++;
596
597 } else if (op & P(OP, STORE)) {
598 /* store */
599 stats->store++;
600
601 if (!daddr) {
602 stats->st_noadrs++;
603 return -1;
604 }
605
606 if (lvl & P(LVL, HIT)) {
607 if (lvl & P(LVL, UNC)) stats->st_uncache++;
608 if (lvl & P(LVL, L1 )) stats->st_l1hit++;
609 }
610 if (lvl & P(LVL, MISS))
611 if (lvl & P(LVL, L1)) stats->st_l1miss++;
612 if (lvl & P(LVL, NA))
613 stats->st_na++;
614 } else {
615 /* unparsable data_src? */
616 stats->noparse++;
617 return -1;
618 }
619
620 if (!mi->daddr.ms.map || !mi->iaddr.ms.map) {
621 stats->nomap++;
622 return -1;
623 }
624
625 #undef P
626 #undef HITM_INC
627 return err;
628 }
629
c2c_add_stats(struct c2c_stats * stats,struct c2c_stats * add)630 void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
631 {
632 stats->nr_entries += add->nr_entries;
633
634 stats->locks += add->locks;
635 stats->store += add->store;
636 stats->st_uncache += add->st_uncache;
637 stats->st_noadrs += add->st_noadrs;
638 stats->st_l1hit += add->st_l1hit;
639 stats->st_l1miss += add->st_l1miss;
640 stats->st_na += add->st_na;
641 stats->load += add->load;
642 stats->ld_excl += add->ld_excl;
643 stats->ld_shared += add->ld_shared;
644 stats->ld_uncache += add->ld_uncache;
645 stats->ld_io += add->ld_io;
646 stats->ld_miss += add->ld_miss;
647 stats->ld_noadrs += add->ld_noadrs;
648 stats->ld_fbhit += add->ld_fbhit;
649 stats->ld_l1hit += add->ld_l1hit;
650 stats->ld_l2hit += add->ld_l2hit;
651 stats->ld_llchit += add->ld_llchit;
652 stats->lcl_hitm += add->lcl_hitm;
653 stats->rmt_hitm += add->rmt_hitm;
654 stats->tot_hitm += add->tot_hitm;
655 stats->rmt_hit += add->rmt_hit;
656 stats->lcl_dram += add->lcl_dram;
657 stats->rmt_dram += add->rmt_dram;
658 stats->blk_data += add->blk_data;
659 stats->blk_addr += add->blk_addr;
660 stats->nomap += add->nomap;
661 stats->noparse += add->noparse;
662 }
663