1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #include <asm/barrier.h>
4 #include <linux/perf_event.h>
5 #include <linux/ring_buffer.h>
6 #include <sys/epoll.h>
7 #include <sys/mman.h>
8 #include <argp.h>
9 #include <stdlib.h>
10 #include "bench.h"
11 #include "ringbuf_bench.skel.h"
12 #include "perfbuf_bench.skel.h"
13
14 static struct {
15 bool back2back;
16 int batch_cnt;
17 bool sampled;
18 int sample_rate;
19 int ringbuf_sz; /* per-ringbuf, in bytes */
20 bool ringbuf_use_output; /* use slower output API */
21 int perfbuf_sz; /* per-CPU size, in pages */
22 } args = {
23 .back2back = false,
24 .batch_cnt = 500,
25 .sampled = false,
26 .sample_rate = 500,
27 .ringbuf_sz = 512 * 1024,
28 .ringbuf_use_output = false,
29 .perfbuf_sz = 128,
30 };
31
32 enum {
33 ARG_RB_BACK2BACK = 2000,
34 ARG_RB_USE_OUTPUT = 2001,
35 ARG_RB_BATCH_CNT = 2002,
36 ARG_RB_SAMPLED = 2003,
37 ARG_RB_SAMPLE_RATE = 2004,
38 };
39
40 static const struct argp_option opts[] = {
41 { "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
42 { "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
43 { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
44 { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
45 { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
46 {},
47 };
48
parse_arg(int key,char * arg,struct argp_state * state)49 static error_t parse_arg(int key, char *arg, struct argp_state *state)
50 {
51 switch (key) {
52 case ARG_RB_BACK2BACK:
53 args.back2back = true;
54 break;
55 case ARG_RB_USE_OUTPUT:
56 args.ringbuf_use_output = true;
57 break;
58 case ARG_RB_BATCH_CNT:
59 args.batch_cnt = strtol(arg, NULL, 10);
60 if (args.batch_cnt < 0) {
61 fprintf(stderr, "Invalid batch count.");
62 argp_usage(state);
63 }
64 break;
65 case ARG_RB_SAMPLED:
66 args.sampled = true;
67 break;
68 case ARG_RB_SAMPLE_RATE:
69 args.sample_rate = strtol(arg, NULL, 10);
70 if (args.sample_rate < 0) {
71 fprintf(stderr, "Invalid perfbuf sample rate.");
72 argp_usage(state);
73 }
74 break;
75 default:
76 return ARGP_ERR_UNKNOWN;
77 }
78 return 0;
79 }
80
81 /* exported into benchmark runner */
82 const struct argp bench_ringbufs_argp = {
83 .options = opts,
84 .parser = parse_arg,
85 };
86
87 /* RINGBUF-LIBBPF benchmark */
88
89 static struct counter buf_hits;
90
bufs_trigger_batch(void)91 static inline void bufs_trigger_batch(void)
92 {
93 (void)syscall(__NR_getpgid);
94 }
95
bufs_validate(void)96 static void bufs_validate(void)
97 {
98 if (env.consumer_cnt != 1) {
99 fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
100 exit(1);
101 }
102
103 if (args.back2back && env.producer_cnt > 1) {
104 fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
105 exit(1);
106 }
107 }
108
bufs_sample_producer(void * input)109 static void *bufs_sample_producer(void *input)
110 {
111 if (args.back2back) {
112 /* initial batch to get everything started */
113 bufs_trigger_batch();
114 return NULL;
115 }
116
117 while (true)
118 bufs_trigger_batch();
119 return NULL;
120 }
121
122 static struct ringbuf_libbpf_ctx {
123 struct ringbuf_bench *skel;
124 struct ring_buffer *ringbuf;
125 } ringbuf_libbpf_ctx;
126
ringbuf_libbpf_measure(struct bench_res * res)127 static void ringbuf_libbpf_measure(struct bench_res *res)
128 {
129 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
130
131 res->hits = atomic_swap(&buf_hits.value, 0);
132 res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
133 }
134
ringbuf_setup_skeleton(void)135 static struct ringbuf_bench *ringbuf_setup_skeleton(void)
136 {
137 struct ringbuf_bench *skel;
138
139 setup_libbpf();
140
141 skel = ringbuf_bench__open();
142 if (!skel) {
143 fprintf(stderr, "failed to open skeleton\n");
144 exit(1);
145 }
146
147 skel->rodata->batch_cnt = args.batch_cnt;
148 skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
149
150 if (args.sampled)
151 /* record data + header take 16 bytes */
152 skel->rodata->wakeup_data_size = args.sample_rate * 16;
153
154 bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
155
156 if (ringbuf_bench__load(skel)) {
157 fprintf(stderr, "failed to load skeleton\n");
158 exit(1);
159 }
160
161 return skel;
162 }
163
buf_process_sample(void * ctx,void * data,size_t len)164 static int buf_process_sample(void *ctx, void *data, size_t len)
165 {
166 atomic_inc(&buf_hits.value);
167 return 0;
168 }
169
ringbuf_libbpf_setup(void)170 static void ringbuf_libbpf_setup(void)
171 {
172 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
173 struct bpf_link *link;
174
175 ctx->skel = ringbuf_setup_skeleton();
176 ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
177 buf_process_sample, NULL, NULL);
178 if (!ctx->ringbuf) {
179 fprintf(stderr, "failed to create ringbuf\n");
180 exit(1);
181 }
182
183 link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
184 if (!link) {
185 fprintf(stderr, "failed to attach program!\n");
186 exit(1);
187 }
188 }
189
ringbuf_libbpf_consumer(void * input)190 static void *ringbuf_libbpf_consumer(void *input)
191 {
192 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
193
194 while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
195 if (args.back2back)
196 bufs_trigger_batch();
197 }
198 fprintf(stderr, "ringbuf polling failed!\n");
199 return NULL;
200 }
201
202 /* RINGBUF-CUSTOM benchmark */
203 struct ringbuf_custom {
204 __u64 *consumer_pos;
205 __u64 *producer_pos;
206 __u64 mask;
207 void *data;
208 int map_fd;
209 };
210
211 static struct ringbuf_custom_ctx {
212 struct ringbuf_bench *skel;
213 struct ringbuf_custom ringbuf;
214 int epoll_fd;
215 struct epoll_event event;
216 } ringbuf_custom_ctx;
217
ringbuf_custom_measure(struct bench_res * res)218 static void ringbuf_custom_measure(struct bench_res *res)
219 {
220 struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
221
222 res->hits = atomic_swap(&buf_hits.value, 0);
223 res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
224 }
225
ringbuf_custom_setup(void)226 static void ringbuf_custom_setup(void)
227 {
228 struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
229 const size_t page_size = getpagesize();
230 struct bpf_link *link;
231 struct ringbuf_custom *r;
232 void *tmp;
233 int err;
234
235 ctx->skel = ringbuf_setup_skeleton();
236
237 ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
238 if (ctx->epoll_fd < 0) {
239 fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
240 exit(1);
241 }
242
243 r = &ctx->ringbuf;
244 r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
245 r->mask = args.ringbuf_sz - 1;
246
247 /* Map writable consumer page */
248 tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
249 r->map_fd, 0);
250 if (tmp == MAP_FAILED) {
251 fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
252 exit(1);
253 }
254 r->consumer_pos = tmp;
255
256 /* Map read-only producer page and data pages. */
257 tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
258 r->map_fd, page_size);
259 if (tmp == MAP_FAILED) {
260 fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
261 exit(1);
262 }
263 r->producer_pos = tmp;
264 r->data = tmp + page_size;
265
266 ctx->event.events = EPOLLIN;
267 err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
268 if (err < 0) {
269 fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
270 exit(1);
271 }
272
273 link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
274 if (!link) {
275 fprintf(stderr, "failed to attach program\n");
276 exit(1);
277 }
278 }
279
280 #define RINGBUF_BUSY_BIT (1 << 31)
281 #define RINGBUF_DISCARD_BIT (1 << 30)
282 #define RINGBUF_META_LEN 8
283
roundup_len(__u32 len)284 static inline int roundup_len(__u32 len)
285 {
286 /* clear out top 2 bits */
287 len <<= 2;
288 len >>= 2;
289 /* add length prefix */
290 len += RINGBUF_META_LEN;
291 /* round up to 8 byte alignment */
292 return (len + 7) / 8 * 8;
293 }
294
ringbuf_custom_process_ring(struct ringbuf_custom * r)295 static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
296 {
297 unsigned long cons_pos, prod_pos;
298 int *len_ptr, len;
299 bool got_new_data;
300
301 cons_pos = smp_load_acquire(r->consumer_pos);
302 while (true) {
303 got_new_data = false;
304 prod_pos = smp_load_acquire(r->producer_pos);
305 while (cons_pos < prod_pos) {
306 len_ptr = r->data + (cons_pos & r->mask);
307 len = smp_load_acquire(len_ptr);
308
309 /* sample not committed yet, bail out for now */
310 if (len & RINGBUF_BUSY_BIT)
311 return;
312
313 got_new_data = true;
314 cons_pos += roundup_len(len);
315
316 atomic_inc(&buf_hits.value);
317 }
318 if (got_new_data)
319 smp_store_release(r->consumer_pos, cons_pos);
320 else
321 break;
322 }
323 }
324
ringbuf_custom_consumer(void * input)325 static void *ringbuf_custom_consumer(void *input)
326 {
327 struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
328 int cnt;
329
330 do {
331 if (args.back2back)
332 bufs_trigger_batch();
333 cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
334 if (cnt > 0)
335 ringbuf_custom_process_ring(&ctx->ringbuf);
336 } while (cnt >= 0);
337 fprintf(stderr, "ringbuf polling failed!\n");
338 return 0;
339 }
340
341 /* PERFBUF-LIBBPF benchmark */
342 static struct perfbuf_libbpf_ctx {
343 struct perfbuf_bench *skel;
344 struct perf_buffer *perfbuf;
345 } perfbuf_libbpf_ctx;
346
perfbuf_measure(struct bench_res * res)347 static void perfbuf_measure(struct bench_res *res)
348 {
349 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
350
351 res->hits = atomic_swap(&buf_hits.value, 0);
352 res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
353 }
354
perfbuf_setup_skeleton(void)355 static struct perfbuf_bench *perfbuf_setup_skeleton(void)
356 {
357 struct perfbuf_bench *skel;
358
359 setup_libbpf();
360
361 skel = perfbuf_bench__open();
362 if (!skel) {
363 fprintf(stderr, "failed to open skeleton\n");
364 exit(1);
365 }
366
367 skel->rodata->batch_cnt = args.batch_cnt;
368
369 if (perfbuf_bench__load(skel)) {
370 fprintf(stderr, "failed to load skeleton\n");
371 exit(1);
372 }
373
374 return skel;
375 }
376
377 static enum bpf_perf_event_ret
perfbuf_process_sample_raw(void * input_ctx,int cpu,struct perf_event_header * e)378 perfbuf_process_sample_raw(void *input_ctx, int cpu,
379 struct perf_event_header *e)
380 {
381 switch (e->type) {
382 case PERF_RECORD_SAMPLE:
383 atomic_inc(&buf_hits.value);
384 break;
385 case PERF_RECORD_LOST:
386 break;
387 default:
388 return LIBBPF_PERF_EVENT_ERROR;
389 }
390 return LIBBPF_PERF_EVENT_CONT;
391 }
392
perfbuf_libbpf_setup(void)393 static void perfbuf_libbpf_setup(void)
394 {
395 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
396 struct perf_event_attr attr;
397 struct bpf_link *link;
398
399 ctx->skel = perfbuf_setup_skeleton();
400
401 memset(&attr, 0, sizeof(attr));
402 attr.config = PERF_COUNT_SW_BPF_OUTPUT,
403 attr.type = PERF_TYPE_SOFTWARE;
404 attr.sample_type = PERF_SAMPLE_RAW;
405 /* notify only every Nth sample */
406 if (args.sampled) {
407 attr.sample_period = args.sample_rate;
408 attr.wakeup_events = args.sample_rate;
409 } else {
410 attr.sample_period = 1;
411 attr.wakeup_events = 1;
412 }
413
414 if (args.sample_rate > args.batch_cnt) {
415 fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
416 args.sample_rate, args.batch_cnt);
417 exit(1);
418 }
419
420 ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
421 args.perfbuf_sz, &attr,
422 perfbuf_process_sample_raw, NULL, NULL);
423 if (!ctx->perfbuf) {
424 fprintf(stderr, "failed to create perfbuf\n");
425 exit(1);
426 }
427
428 link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
429 if (!link) {
430 fprintf(stderr, "failed to attach program\n");
431 exit(1);
432 }
433 }
434
perfbuf_libbpf_consumer(void * input)435 static void *perfbuf_libbpf_consumer(void *input)
436 {
437 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
438
439 while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
440 if (args.back2back)
441 bufs_trigger_batch();
442 }
443 fprintf(stderr, "perfbuf polling failed!\n");
444 return NULL;
445 }
446
447 /* PERFBUF-CUSTOM benchmark */
448
449 /* copies of internal libbpf definitions */
450 struct perf_cpu_buf {
451 struct perf_buffer *pb;
452 void *base; /* mmap()'ed memory */
453 void *buf; /* for reconstructing segmented data */
454 size_t buf_size;
455 int fd;
456 int cpu;
457 int map_key;
458 };
459
460 struct perf_buffer {
461 perf_buffer_event_fn event_cb;
462 perf_buffer_sample_fn sample_cb;
463 perf_buffer_lost_fn lost_cb;
464 void *ctx; /* passed into callbacks */
465
466 size_t page_size;
467 size_t mmap_size;
468 struct perf_cpu_buf **cpu_bufs;
469 struct epoll_event *events;
470 int cpu_cnt; /* number of allocated CPU buffers */
471 int epoll_fd; /* perf event FD */
472 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
473 };
474
perfbuf_custom_consumer(void * input)475 static void *perfbuf_custom_consumer(void *input)
476 {
477 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
478 struct perf_buffer *pb = ctx->perfbuf;
479 struct perf_cpu_buf *cpu_buf;
480 struct perf_event_mmap_page *header;
481 size_t mmap_mask = pb->mmap_size - 1;
482 struct perf_event_header *ehdr;
483 __u64 data_head, data_tail;
484 size_t ehdr_size;
485 void *base;
486 int i, cnt;
487
488 while (true) {
489 if (args.back2back)
490 bufs_trigger_batch();
491 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
492 if (cnt <= 0) {
493 fprintf(stderr, "perf epoll failed: %d\n", -errno);
494 exit(1);
495 }
496
497 for (i = 0; i < cnt; ++i) {
498 cpu_buf = pb->events[i].data.ptr;
499 header = cpu_buf->base;
500 base = ((void *)header) + pb->page_size;
501
502 data_head = ring_buffer_read_head(header);
503 data_tail = header->data_tail;
504 while (data_head != data_tail) {
505 ehdr = base + (data_tail & mmap_mask);
506 ehdr_size = ehdr->size;
507
508 if (ehdr->type == PERF_RECORD_SAMPLE)
509 atomic_inc(&buf_hits.value);
510
511 data_tail += ehdr_size;
512 }
513 ring_buffer_write_tail(header, data_tail);
514 }
515 }
516 return NULL;
517 }
518
519 const struct bench bench_rb_libbpf = {
520 .name = "rb-libbpf",
521 .validate = bufs_validate,
522 .setup = ringbuf_libbpf_setup,
523 .producer_thread = bufs_sample_producer,
524 .consumer_thread = ringbuf_libbpf_consumer,
525 .measure = ringbuf_libbpf_measure,
526 .report_progress = hits_drops_report_progress,
527 .report_final = hits_drops_report_final,
528 };
529
530 const struct bench bench_rb_custom = {
531 .name = "rb-custom",
532 .validate = bufs_validate,
533 .setup = ringbuf_custom_setup,
534 .producer_thread = bufs_sample_producer,
535 .consumer_thread = ringbuf_custom_consumer,
536 .measure = ringbuf_custom_measure,
537 .report_progress = hits_drops_report_progress,
538 .report_final = hits_drops_report_final,
539 };
540
541 const struct bench bench_pb_libbpf = {
542 .name = "pb-libbpf",
543 .validate = bufs_validate,
544 .setup = perfbuf_libbpf_setup,
545 .producer_thread = bufs_sample_producer,
546 .consumer_thread = perfbuf_libbpf_consumer,
547 .measure = perfbuf_measure,
548 .report_progress = hits_drops_report_progress,
549 .report_final = hits_drops_report_final,
550 };
551
552 const struct bench bench_pb_custom = {
553 .name = "pb-custom",
554 .validate = bufs_validate,
555 .setup = perfbuf_libbpf_setup,
556 .producer_thread = bufs_sample_producer,
557 .consumer_thread = perfbuf_custom_consumer,
558 .measure = perfbuf_measure,
559 .report_progress = hits_drops_report_progress,
560 .report_final = hits_drops_report_final,
561 };
562
563