1 // SPDX-License-Identifier: LGPL-2.1
2 #define _GNU_SOURCE
3 #include <assert.h>
4 #include <linux/membarrier.h>
5 #include <pthread.h>
6 #include <sched.h>
7 #include <stdatomic.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <poll.h>
15 #include <sys/types.h>
16 #include <signal.h>
17 #include <errno.h>
18 #include <stddef.h>
19
rseq_gettid(void)20 static inline pid_t rseq_gettid(void)
21 {
22 return syscall(__NR_gettid);
23 }
24
25 #define NR_INJECT 9
26 static int loop_cnt[NR_INJECT + 1];
27
28 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
29 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
30 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
31 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
32 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
33 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
34
35 static int opt_modulo, verbose;
36
37 static int opt_yield, opt_signal, opt_sleep,
38 opt_disable_rseq, opt_threads = 200,
39 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
40
41 #ifndef RSEQ_SKIP_FASTPATH
42 static long long opt_reps = 5000;
43 #else
44 static long long opt_reps = 100;
45 #endif
46
47 static __thread __attribute__((tls_model("initial-exec")))
48 unsigned int signals_delivered;
49
50 #ifndef BENCHMARK
51
52 static __thread __attribute__((tls_model("initial-exec"), unused))
53 unsigned int yield_mod_cnt, nr_abort;
54
55 #define printf_verbose(fmt, ...) \
56 do { \
57 if (verbose) \
58 printf(fmt, ## __VA_ARGS__); \
59 } while (0)
60
61 #ifdef __i386__
62
63 #define INJECT_ASM_REG "eax"
64
65 #define RSEQ_INJECT_CLOBBER \
66 , INJECT_ASM_REG
67
68 #define RSEQ_INJECT_ASM(n) \
69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
71 "jz 333f\n\t" \
72 "222:\n\t" \
73 "dec %%" INJECT_ASM_REG "\n\t" \
74 "jnz 222b\n\t" \
75 "333:\n\t"
76
77 #elif defined(__x86_64__)
78
79 #define INJECT_ASM_REG_P "rax"
80 #define INJECT_ASM_REG "eax"
81
82 #define RSEQ_INJECT_CLOBBER \
83 , INJECT_ASM_REG_P \
84 , INJECT_ASM_REG
85
86 #define RSEQ_INJECT_ASM(n) \
87 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
88 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "jz 333f\n\t" \
91 "222:\n\t" \
92 "dec %%" INJECT_ASM_REG "\n\t" \
93 "jnz 222b\n\t" \
94 "333:\n\t"
95
96 #elif defined(__s390__)
97
98 #define RSEQ_INJECT_INPUT \
99 , [loop_cnt_1]"m"(loop_cnt[1]) \
100 , [loop_cnt_2]"m"(loop_cnt[2]) \
101 , [loop_cnt_3]"m"(loop_cnt[3]) \
102 , [loop_cnt_4]"m"(loop_cnt[4]) \
103 , [loop_cnt_5]"m"(loop_cnt[5]) \
104 , [loop_cnt_6]"m"(loop_cnt[6])
105
106 #define INJECT_ASM_REG "r12"
107
108 #define RSEQ_INJECT_CLOBBER \
109 , INJECT_ASM_REG
110
111 #define RSEQ_INJECT_ASM(n) \
112 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
113 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
114 "je 333f\n\t" \
115 "222:\n\t" \
116 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
117 "jnz 222b\n\t" \
118 "333:\n\t"
119
120 #elif defined(__ARMEL__)
121
122 #define RSEQ_INJECT_INPUT \
123 , [loop_cnt_1]"m"(loop_cnt[1]) \
124 , [loop_cnt_2]"m"(loop_cnt[2]) \
125 , [loop_cnt_3]"m"(loop_cnt[3]) \
126 , [loop_cnt_4]"m"(loop_cnt[4]) \
127 , [loop_cnt_5]"m"(loop_cnt[5]) \
128 , [loop_cnt_6]"m"(loop_cnt[6])
129
130 #define INJECT_ASM_REG "r4"
131
132 #define RSEQ_INJECT_CLOBBER \
133 , INJECT_ASM_REG
134
135 #define RSEQ_INJECT_ASM(n) \
136 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
137 "cmp " INJECT_ASM_REG ", #0\n\t" \
138 "beq 333f\n\t" \
139 "222:\n\t" \
140 "subs " INJECT_ASM_REG ", #1\n\t" \
141 "bne 222b\n\t" \
142 "333:\n\t"
143
144 #elif defined(__AARCH64EL__)
145
146 #define RSEQ_INJECT_INPUT \
147 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
148 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
149 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
150 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
151 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
152 , [loop_cnt_6] "Qo" (loop_cnt[6])
153
154 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
155
156 #define RSEQ_INJECT_ASM(n) \
157 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
158 " cbz " INJECT_ASM_REG ", 333f\n" \
159 "222:\n" \
160 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
161 " cbnz " INJECT_ASM_REG ", 222b\n" \
162 "333:\n"
163
164 #elif defined(__PPC__)
165
166 #define RSEQ_INJECT_INPUT \
167 , [loop_cnt_1]"m"(loop_cnt[1]) \
168 , [loop_cnt_2]"m"(loop_cnt[2]) \
169 , [loop_cnt_3]"m"(loop_cnt[3]) \
170 , [loop_cnt_4]"m"(loop_cnt[4]) \
171 , [loop_cnt_5]"m"(loop_cnt[5]) \
172 , [loop_cnt_6]"m"(loop_cnt[6])
173
174 #define INJECT_ASM_REG "r18"
175
176 #define RSEQ_INJECT_CLOBBER \
177 , INJECT_ASM_REG
178
179 #define RSEQ_INJECT_ASM(n) \
180 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
181 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
182 "beq 333f\n\t" \
183 "222:\n\t" \
184 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
185 "bne 222b\n\t" \
186 "333:\n\t"
187
188 #elif defined(__mips__)
189
190 #define RSEQ_INJECT_INPUT \
191 , [loop_cnt_1]"m"(loop_cnt[1]) \
192 , [loop_cnt_2]"m"(loop_cnt[2]) \
193 , [loop_cnt_3]"m"(loop_cnt[3]) \
194 , [loop_cnt_4]"m"(loop_cnt[4]) \
195 , [loop_cnt_5]"m"(loop_cnt[5]) \
196 , [loop_cnt_6]"m"(loop_cnt[6])
197
198 #define INJECT_ASM_REG "$5"
199
200 #define RSEQ_INJECT_CLOBBER \
201 , INJECT_ASM_REG
202
203 #define RSEQ_INJECT_ASM(n) \
204 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
205 "beqz " INJECT_ASM_REG ", 333f\n\t" \
206 "222:\n\t" \
207 "addiu " INJECT_ASM_REG ", -1\n\t" \
208 "bnez " INJECT_ASM_REG ", 222b\n\t" \
209 "333:\n\t"
210 #elif defined(__riscv)
211
212 #define RSEQ_INJECT_INPUT \
213 , [loop_cnt_1]"m"(loop_cnt[1]) \
214 , [loop_cnt_2]"m"(loop_cnt[2]) \
215 , [loop_cnt_3]"m"(loop_cnt[3]) \
216 , [loop_cnt_4]"m"(loop_cnt[4]) \
217 , [loop_cnt_5]"m"(loop_cnt[5]) \
218 , [loop_cnt_6]"m"(loop_cnt[6])
219
220 #define INJECT_ASM_REG "t1"
221
222 #define RSEQ_INJECT_CLOBBER \
223 , INJECT_ASM_REG
224
225 #define RSEQ_INJECT_ASM(n) \
226 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
227 "beqz " INJECT_ASM_REG ", 333f\n\t" \
228 "222:\n\t" \
229 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
230 "bnez " INJECT_ASM_REG ", 222b\n\t" \
231 "333:\n\t"
232
233
234 #else
235 #error unsupported target
236 #endif
237
238 #define RSEQ_INJECT_FAILED \
239 nr_abort++;
240
241 #define RSEQ_INJECT_C(n) \
242 { \
243 int loc_i, loc_nr_loops = loop_cnt[n]; \
244 \
245 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
246 rseq_barrier(); \
247 } \
248 if (loc_nr_loops == -1 && opt_modulo) { \
249 if (yield_mod_cnt == opt_modulo - 1) { \
250 if (opt_sleep > 0) \
251 poll(NULL, 0, opt_sleep); \
252 if (opt_yield) \
253 sched_yield(); \
254 if (opt_signal) \
255 raise(SIGUSR1); \
256 yield_mod_cnt = 0; \
257 } else { \
258 yield_mod_cnt++; \
259 } \
260 } \
261 }
262
263 #else
264
265 #define printf_verbose(fmt, ...)
266
267 #endif /* BENCHMARK */
268
269 #include "rseq.h"
270
271 struct percpu_lock_entry {
272 intptr_t v;
273 } __attribute__((aligned(128)));
274
275 struct percpu_lock {
276 struct percpu_lock_entry c[CPU_SETSIZE];
277 };
278
279 struct test_data_entry {
280 intptr_t count;
281 } __attribute__((aligned(128)));
282
283 struct spinlock_test_data {
284 struct percpu_lock lock;
285 struct test_data_entry c[CPU_SETSIZE];
286 };
287
288 struct spinlock_thread_test_data {
289 struct spinlock_test_data *data;
290 long long reps;
291 int reg;
292 };
293
294 struct inc_test_data {
295 struct test_data_entry c[CPU_SETSIZE];
296 };
297
298 struct inc_thread_test_data {
299 struct inc_test_data *data;
300 long long reps;
301 int reg;
302 };
303
304 struct percpu_list_node {
305 intptr_t data;
306 struct percpu_list_node *next;
307 };
308
309 struct percpu_list_entry {
310 struct percpu_list_node *head;
311 } __attribute__((aligned(128)));
312
313 struct percpu_list {
314 struct percpu_list_entry c[CPU_SETSIZE];
315 };
316
317 #define BUFFER_ITEM_PER_CPU 100
318
319 struct percpu_buffer_node {
320 intptr_t data;
321 };
322
323 struct percpu_buffer_entry {
324 intptr_t offset;
325 intptr_t buflen;
326 struct percpu_buffer_node **array;
327 } __attribute__((aligned(128)));
328
329 struct percpu_buffer {
330 struct percpu_buffer_entry c[CPU_SETSIZE];
331 };
332
333 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
334
335 struct percpu_memcpy_buffer_node {
336 intptr_t data1;
337 uint64_t data2;
338 };
339
340 struct percpu_memcpy_buffer_entry {
341 intptr_t offset;
342 intptr_t buflen;
343 struct percpu_memcpy_buffer_node *array;
344 } __attribute__((aligned(128)));
345
346 struct percpu_memcpy_buffer {
347 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
348 };
349
350 /* A simple percpu spinlock. Grabs lock on current cpu. */
rseq_this_cpu_lock(struct percpu_lock * lock)351 static int rseq_this_cpu_lock(struct percpu_lock *lock)
352 {
353 int cpu;
354
355 for (;;) {
356 int ret;
357
358 cpu = rseq_cpu_start();
359 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
360 0, 1, cpu);
361 if (rseq_likely(!ret))
362 break;
363 /* Retry if comparison fails or rseq aborts. */
364 }
365 /*
366 * Acquire semantic when taking lock after control dependency.
367 * Matches rseq_smp_store_release().
368 */
369 rseq_smp_acquire__after_ctrl_dep();
370 return cpu;
371 }
372
rseq_percpu_unlock(struct percpu_lock * lock,int cpu)373 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
374 {
375 assert(lock->c[cpu].v == 1);
376 /*
377 * Release lock, with release semantic. Matches
378 * rseq_smp_acquire__after_ctrl_dep().
379 */
380 rseq_smp_store_release(&lock->c[cpu].v, 0);
381 }
382
test_percpu_spinlock_thread(void * arg)383 void *test_percpu_spinlock_thread(void *arg)
384 {
385 struct spinlock_thread_test_data *thread_data = arg;
386 struct spinlock_test_data *data = thread_data->data;
387 long long i, reps;
388
389 if (!opt_disable_rseq && thread_data->reg &&
390 rseq_register_current_thread())
391 abort();
392 reps = thread_data->reps;
393 for (i = 0; i < reps; i++) {
394 int cpu = rseq_this_cpu_lock(&data->lock);
395 data->c[cpu].count++;
396 rseq_percpu_unlock(&data->lock, cpu);
397 #ifndef BENCHMARK
398 if (i != 0 && !(i % (reps / 10)))
399 printf_verbose("tid %d: count %lld\n",
400 (int) rseq_gettid(), i);
401 #endif
402 }
403 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
404 (int) rseq_gettid(), nr_abort, signals_delivered);
405 if (!opt_disable_rseq && thread_data->reg &&
406 rseq_unregister_current_thread())
407 abort();
408 return NULL;
409 }
410
411 /*
412 * A simple test which implements a sharded counter using a per-cpu
413 * lock. Obviously real applications might prefer to simply use a
414 * per-cpu increment; however, this is reasonable for a test and the
415 * lock can be extended to synchronize more complicated operations.
416 */
test_percpu_spinlock(void)417 void test_percpu_spinlock(void)
418 {
419 const int num_threads = opt_threads;
420 int i, ret;
421 uint64_t sum;
422 pthread_t test_threads[num_threads];
423 struct spinlock_test_data data;
424 struct spinlock_thread_test_data thread_data[num_threads];
425
426 memset(&data, 0, sizeof(data));
427 for (i = 0; i < num_threads; i++) {
428 thread_data[i].reps = opt_reps;
429 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
430 thread_data[i].reg = 1;
431 else
432 thread_data[i].reg = 0;
433 thread_data[i].data = &data;
434 ret = pthread_create(&test_threads[i], NULL,
435 test_percpu_spinlock_thread,
436 &thread_data[i]);
437 if (ret) {
438 errno = ret;
439 perror("pthread_create");
440 abort();
441 }
442 }
443
444 for (i = 0; i < num_threads; i++) {
445 ret = pthread_join(test_threads[i], NULL);
446 if (ret) {
447 errno = ret;
448 perror("pthread_join");
449 abort();
450 }
451 }
452
453 sum = 0;
454 for (i = 0; i < CPU_SETSIZE; i++)
455 sum += data.c[i].count;
456
457 assert(sum == (uint64_t)opt_reps * num_threads);
458 }
459
test_percpu_inc_thread(void * arg)460 void *test_percpu_inc_thread(void *arg)
461 {
462 struct inc_thread_test_data *thread_data = arg;
463 struct inc_test_data *data = thread_data->data;
464 long long i, reps;
465
466 if (!opt_disable_rseq && thread_data->reg &&
467 rseq_register_current_thread())
468 abort();
469 reps = thread_data->reps;
470 for (i = 0; i < reps; i++) {
471 int ret;
472
473 do {
474 int cpu;
475
476 cpu = rseq_cpu_start();
477 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
478 } while (rseq_unlikely(ret));
479 #ifndef BENCHMARK
480 if (i != 0 && !(i % (reps / 10)))
481 printf_verbose("tid %d: count %lld\n",
482 (int) rseq_gettid(), i);
483 #endif
484 }
485 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
486 (int) rseq_gettid(), nr_abort, signals_delivered);
487 if (!opt_disable_rseq && thread_data->reg &&
488 rseq_unregister_current_thread())
489 abort();
490 return NULL;
491 }
492
test_percpu_inc(void)493 void test_percpu_inc(void)
494 {
495 const int num_threads = opt_threads;
496 int i, ret;
497 uint64_t sum;
498 pthread_t test_threads[num_threads];
499 struct inc_test_data data;
500 struct inc_thread_test_data thread_data[num_threads];
501
502 memset(&data, 0, sizeof(data));
503 for (i = 0; i < num_threads; i++) {
504 thread_data[i].reps = opt_reps;
505 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
506 thread_data[i].reg = 1;
507 else
508 thread_data[i].reg = 0;
509 thread_data[i].data = &data;
510 ret = pthread_create(&test_threads[i], NULL,
511 test_percpu_inc_thread,
512 &thread_data[i]);
513 if (ret) {
514 errno = ret;
515 perror("pthread_create");
516 abort();
517 }
518 }
519
520 for (i = 0; i < num_threads; i++) {
521 ret = pthread_join(test_threads[i], NULL);
522 if (ret) {
523 errno = ret;
524 perror("pthread_join");
525 abort();
526 }
527 }
528
529 sum = 0;
530 for (i = 0; i < CPU_SETSIZE; i++)
531 sum += data.c[i].count;
532
533 assert(sum == (uint64_t)opt_reps * num_threads);
534 }
535
this_cpu_list_push(struct percpu_list * list,struct percpu_list_node * node,int * _cpu)536 void this_cpu_list_push(struct percpu_list *list,
537 struct percpu_list_node *node,
538 int *_cpu)
539 {
540 int cpu;
541
542 for (;;) {
543 intptr_t *targetptr, newval, expect;
544 int ret;
545
546 cpu = rseq_cpu_start();
547 /* Load list->c[cpu].head with single-copy atomicity. */
548 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
549 newval = (intptr_t)node;
550 targetptr = (intptr_t *)&list->c[cpu].head;
551 node->next = (struct percpu_list_node *)expect;
552 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
553 if (rseq_likely(!ret))
554 break;
555 /* Retry if comparison fails or rseq aborts. */
556 }
557 if (_cpu)
558 *_cpu = cpu;
559 }
560
561 /*
562 * Unlike a traditional lock-less linked list; the availability of a
563 * rseq primitive allows us to implement pop without concerns over
564 * ABA-type races.
565 */
this_cpu_list_pop(struct percpu_list * list,int * _cpu)566 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
567 int *_cpu)
568 {
569 struct percpu_list_node *node = NULL;
570 int cpu;
571
572 for (;;) {
573 struct percpu_list_node *head;
574 intptr_t *targetptr, expectnot, *load;
575 long offset;
576 int ret;
577
578 cpu = rseq_cpu_start();
579 targetptr = (intptr_t *)&list->c[cpu].head;
580 expectnot = (intptr_t)NULL;
581 offset = offsetof(struct percpu_list_node, next);
582 load = (intptr_t *)&head;
583 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
584 offset, load, cpu);
585 if (rseq_likely(!ret)) {
586 node = head;
587 break;
588 }
589 if (ret > 0)
590 break;
591 /* Retry if rseq aborts. */
592 }
593 if (_cpu)
594 *_cpu = cpu;
595 return node;
596 }
597
598 /*
599 * __percpu_list_pop is not safe against concurrent accesses. Should
600 * only be used on lists that are not concurrently modified.
601 */
__percpu_list_pop(struct percpu_list * list,int cpu)602 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
603 {
604 struct percpu_list_node *node;
605
606 node = list->c[cpu].head;
607 if (!node)
608 return NULL;
609 list->c[cpu].head = node->next;
610 return node;
611 }
612
test_percpu_list_thread(void * arg)613 void *test_percpu_list_thread(void *arg)
614 {
615 long long i, reps;
616 struct percpu_list *list = (struct percpu_list *)arg;
617
618 if (!opt_disable_rseq && rseq_register_current_thread())
619 abort();
620
621 reps = opt_reps;
622 for (i = 0; i < reps; i++) {
623 struct percpu_list_node *node;
624
625 node = this_cpu_list_pop(list, NULL);
626 if (opt_yield)
627 sched_yield(); /* encourage shuffling */
628 if (node)
629 this_cpu_list_push(list, node, NULL);
630 }
631
632 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
633 (int) rseq_gettid(), nr_abort, signals_delivered);
634 if (!opt_disable_rseq && rseq_unregister_current_thread())
635 abort();
636
637 return NULL;
638 }
639
640 /* Simultaneous modification to a per-cpu linked list from many threads. */
test_percpu_list(void)641 void test_percpu_list(void)
642 {
643 const int num_threads = opt_threads;
644 int i, j, ret;
645 uint64_t sum = 0, expected_sum = 0;
646 struct percpu_list list;
647 pthread_t test_threads[num_threads];
648 cpu_set_t allowed_cpus;
649
650 memset(&list, 0, sizeof(list));
651
652 /* Generate list entries for every usable cpu. */
653 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
654 for (i = 0; i < CPU_SETSIZE; i++) {
655 if (!CPU_ISSET(i, &allowed_cpus))
656 continue;
657 for (j = 1; j <= 100; j++) {
658 struct percpu_list_node *node;
659
660 expected_sum += j;
661
662 node = malloc(sizeof(*node));
663 assert(node);
664 node->data = j;
665 node->next = list.c[i].head;
666 list.c[i].head = node;
667 }
668 }
669
670 for (i = 0; i < num_threads; i++) {
671 ret = pthread_create(&test_threads[i], NULL,
672 test_percpu_list_thread, &list);
673 if (ret) {
674 errno = ret;
675 perror("pthread_create");
676 abort();
677 }
678 }
679
680 for (i = 0; i < num_threads; i++) {
681 ret = pthread_join(test_threads[i], NULL);
682 if (ret) {
683 errno = ret;
684 perror("pthread_join");
685 abort();
686 }
687 }
688
689 for (i = 0; i < CPU_SETSIZE; i++) {
690 struct percpu_list_node *node;
691
692 if (!CPU_ISSET(i, &allowed_cpus))
693 continue;
694
695 while ((node = __percpu_list_pop(&list, i))) {
696 sum += node->data;
697 free(node);
698 }
699 }
700
701 /*
702 * All entries should now be accounted for (unless some external
703 * actor is interfering with our allowed affinity while this
704 * test is running).
705 */
706 assert(sum == expected_sum);
707 }
708
this_cpu_buffer_push(struct percpu_buffer * buffer,struct percpu_buffer_node * node,int * _cpu)709 bool this_cpu_buffer_push(struct percpu_buffer *buffer,
710 struct percpu_buffer_node *node,
711 int *_cpu)
712 {
713 bool result = false;
714 int cpu;
715
716 for (;;) {
717 intptr_t *targetptr_spec, newval_spec;
718 intptr_t *targetptr_final, newval_final;
719 intptr_t offset;
720 int ret;
721
722 cpu = rseq_cpu_start();
723 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
724 if (offset == buffer->c[cpu].buflen)
725 break;
726 newval_spec = (intptr_t)node;
727 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
728 newval_final = offset + 1;
729 targetptr_final = &buffer->c[cpu].offset;
730 if (opt_mb)
731 ret = rseq_cmpeqv_trystorev_storev_release(
732 targetptr_final, offset, targetptr_spec,
733 newval_spec, newval_final, cpu);
734 else
735 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
736 offset, targetptr_spec, newval_spec,
737 newval_final, cpu);
738 if (rseq_likely(!ret)) {
739 result = true;
740 break;
741 }
742 /* Retry if comparison fails or rseq aborts. */
743 }
744 if (_cpu)
745 *_cpu = cpu;
746 return result;
747 }
748
this_cpu_buffer_pop(struct percpu_buffer * buffer,int * _cpu)749 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
750 int *_cpu)
751 {
752 struct percpu_buffer_node *head;
753 int cpu;
754
755 for (;;) {
756 intptr_t *targetptr, newval;
757 intptr_t offset;
758 int ret;
759
760 cpu = rseq_cpu_start();
761 /* Load offset with single-copy atomicity. */
762 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
763 if (offset == 0) {
764 head = NULL;
765 break;
766 }
767 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
768 newval = offset - 1;
769 targetptr = (intptr_t *)&buffer->c[cpu].offset;
770 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
771 (intptr_t *)&buffer->c[cpu].array[offset - 1],
772 (intptr_t)head, newval, cpu);
773 if (rseq_likely(!ret))
774 break;
775 /* Retry if comparison fails or rseq aborts. */
776 }
777 if (_cpu)
778 *_cpu = cpu;
779 return head;
780 }
781
782 /*
783 * __percpu_buffer_pop is not safe against concurrent accesses. Should
784 * only be used on buffers that are not concurrently modified.
785 */
__percpu_buffer_pop(struct percpu_buffer * buffer,int cpu)786 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
787 int cpu)
788 {
789 struct percpu_buffer_node *head;
790 intptr_t offset;
791
792 offset = buffer->c[cpu].offset;
793 if (offset == 0)
794 return NULL;
795 head = buffer->c[cpu].array[offset - 1];
796 buffer->c[cpu].offset = offset - 1;
797 return head;
798 }
799
test_percpu_buffer_thread(void * arg)800 void *test_percpu_buffer_thread(void *arg)
801 {
802 long long i, reps;
803 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
804
805 if (!opt_disable_rseq && rseq_register_current_thread())
806 abort();
807
808 reps = opt_reps;
809 for (i = 0; i < reps; i++) {
810 struct percpu_buffer_node *node;
811
812 node = this_cpu_buffer_pop(buffer, NULL);
813 if (opt_yield)
814 sched_yield(); /* encourage shuffling */
815 if (node) {
816 if (!this_cpu_buffer_push(buffer, node, NULL)) {
817 /* Should increase buffer size. */
818 abort();
819 }
820 }
821 }
822
823 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
824 (int) rseq_gettid(), nr_abort, signals_delivered);
825 if (!opt_disable_rseq && rseq_unregister_current_thread())
826 abort();
827
828 return NULL;
829 }
830
831 /* Simultaneous modification to a per-cpu buffer from many threads. */
test_percpu_buffer(void)832 void test_percpu_buffer(void)
833 {
834 const int num_threads = opt_threads;
835 int i, j, ret;
836 uint64_t sum = 0, expected_sum = 0;
837 struct percpu_buffer buffer;
838 pthread_t test_threads[num_threads];
839 cpu_set_t allowed_cpus;
840
841 memset(&buffer, 0, sizeof(buffer));
842
843 /* Generate list entries for every usable cpu. */
844 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
845 for (i = 0; i < CPU_SETSIZE; i++) {
846 if (!CPU_ISSET(i, &allowed_cpus))
847 continue;
848 /* Worse-case is every item in same CPU. */
849 buffer.c[i].array =
850 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
851 BUFFER_ITEM_PER_CPU);
852 assert(buffer.c[i].array);
853 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
854 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
855 struct percpu_buffer_node *node;
856
857 expected_sum += j;
858
859 /*
860 * We could theoretically put the word-sized
861 * "data" directly in the buffer. However, we
862 * want to model objects that would not fit
863 * within a single word, so allocate an object
864 * for each node.
865 */
866 node = malloc(sizeof(*node));
867 assert(node);
868 node->data = j;
869 buffer.c[i].array[j - 1] = node;
870 buffer.c[i].offset++;
871 }
872 }
873
874 for (i = 0; i < num_threads; i++) {
875 ret = pthread_create(&test_threads[i], NULL,
876 test_percpu_buffer_thread, &buffer);
877 if (ret) {
878 errno = ret;
879 perror("pthread_create");
880 abort();
881 }
882 }
883
884 for (i = 0; i < num_threads; i++) {
885 ret = pthread_join(test_threads[i], NULL);
886 if (ret) {
887 errno = ret;
888 perror("pthread_join");
889 abort();
890 }
891 }
892
893 for (i = 0; i < CPU_SETSIZE; i++) {
894 struct percpu_buffer_node *node;
895
896 if (!CPU_ISSET(i, &allowed_cpus))
897 continue;
898
899 while ((node = __percpu_buffer_pop(&buffer, i))) {
900 sum += node->data;
901 free(node);
902 }
903 free(buffer.c[i].array);
904 }
905
906 /*
907 * All entries should now be accounted for (unless some external
908 * actor is interfering with our allowed affinity while this
909 * test is running).
910 */
911 assert(sum == expected_sum);
912 }
913
this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node item,int * _cpu)914 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
915 struct percpu_memcpy_buffer_node item,
916 int *_cpu)
917 {
918 bool result = false;
919 int cpu;
920
921 for (;;) {
922 intptr_t *targetptr_final, newval_final, offset;
923 char *destptr, *srcptr;
924 size_t copylen;
925 int ret;
926
927 cpu = rseq_cpu_start();
928 /* Load offset with single-copy atomicity. */
929 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
930 if (offset == buffer->c[cpu].buflen)
931 break;
932 destptr = (char *)&buffer->c[cpu].array[offset];
933 srcptr = (char *)&item;
934 /* copylen must be <= 4kB. */
935 copylen = sizeof(item);
936 newval_final = offset + 1;
937 targetptr_final = &buffer->c[cpu].offset;
938 if (opt_mb)
939 ret = rseq_cmpeqv_trymemcpy_storev_release(
940 targetptr_final, offset,
941 destptr, srcptr, copylen,
942 newval_final, cpu);
943 else
944 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
945 offset, destptr, srcptr, copylen,
946 newval_final, cpu);
947 if (rseq_likely(!ret)) {
948 result = true;
949 break;
950 }
951 /* Retry if comparison fails or rseq aborts. */
952 }
953 if (_cpu)
954 *_cpu = cpu;
955 return result;
956 }
957
this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int * _cpu)958 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
959 struct percpu_memcpy_buffer_node *item,
960 int *_cpu)
961 {
962 bool result = false;
963 int cpu;
964
965 for (;;) {
966 intptr_t *targetptr_final, newval_final, offset;
967 char *destptr, *srcptr;
968 size_t copylen;
969 int ret;
970
971 cpu = rseq_cpu_start();
972 /* Load offset with single-copy atomicity. */
973 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
974 if (offset == 0)
975 break;
976 destptr = (char *)item;
977 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
978 /* copylen must be <= 4kB. */
979 copylen = sizeof(*item);
980 newval_final = offset - 1;
981 targetptr_final = &buffer->c[cpu].offset;
982 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
983 offset, destptr, srcptr, copylen,
984 newval_final, cpu);
985 if (rseq_likely(!ret)) {
986 result = true;
987 break;
988 }
989 /* Retry if comparison fails or rseq aborts. */
990 }
991 if (_cpu)
992 *_cpu = cpu;
993 return result;
994 }
995
996 /*
997 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
998 * only be used on buffers that are not concurrently modified.
999 */
__percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int cpu)1000 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1001 struct percpu_memcpy_buffer_node *item,
1002 int cpu)
1003 {
1004 intptr_t offset;
1005
1006 offset = buffer->c[cpu].offset;
1007 if (offset == 0)
1008 return false;
1009 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1010 buffer->c[cpu].offset = offset - 1;
1011 return true;
1012 }
1013
test_percpu_memcpy_buffer_thread(void * arg)1014 void *test_percpu_memcpy_buffer_thread(void *arg)
1015 {
1016 long long i, reps;
1017 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1018
1019 if (!opt_disable_rseq && rseq_register_current_thread())
1020 abort();
1021
1022 reps = opt_reps;
1023 for (i = 0; i < reps; i++) {
1024 struct percpu_memcpy_buffer_node item;
1025 bool result;
1026
1027 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1028 if (opt_yield)
1029 sched_yield(); /* encourage shuffling */
1030 if (result) {
1031 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1032 /* Should increase buffer size. */
1033 abort();
1034 }
1035 }
1036 }
1037
1038 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1039 (int) rseq_gettid(), nr_abort, signals_delivered);
1040 if (!opt_disable_rseq && rseq_unregister_current_thread())
1041 abort();
1042
1043 return NULL;
1044 }
1045
1046 /* Simultaneous modification to a per-cpu buffer from many threads. */
test_percpu_memcpy_buffer(void)1047 void test_percpu_memcpy_buffer(void)
1048 {
1049 const int num_threads = opt_threads;
1050 int i, j, ret;
1051 uint64_t sum = 0, expected_sum = 0;
1052 struct percpu_memcpy_buffer buffer;
1053 pthread_t test_threads[num_threads];
1054 cpu_set_t allowed_cpus;
1055
1056 memset(&buffer, 0, sizeof(buffer));
1057
1058 /* Generate list entries for every usable cpu. */
1059 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1060 for (i = 0; i < CPU_SETSIZE; i++) {
1061 if (!CPU_ISSET(i, &allowed_cpus))
1062 continue;
1063 /* Worse-case is every item in same CPU. */
1064 buffer.c[i].array =
1065 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1066 MEMCPY_BUFFER_ITEM_PER_CPU);
1067 assert(buffer.c[i].array);
1068 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1069 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1070 expected_sum += 2 * j + 1;
1071
1072 /*
1073 * We could theoretically put the word-sized
1074 * "data" directly in the buffer. However, we
1075 * want to model objects that would not fit
1076 * within a single word, so allocate an object
1077 * for each node.
1078 */
1079 buffer.c[i].array[j - 1].data1 = j;
1080 buffer.c[i].array[j - 1].data2 = j + 1;
1081 buffer.c[i].offset++;
1082 }
1083 }
1084
1085 for (i = 0; i < num_threads; i++) {
1086 ret = pthread_create(&test_threads[i], NULL,
1087 test_percpu_memcpy_buffer_thread,
1088 &buffer);
1089 if (ret) {
1090 errno = ret;
1091 perror("pthread_create");
1092 abort();
1093 }
1094 }
1095
1096 for (i = 0; i < num_threads; i++) {
1097 ret = pthread_join(test_threads[i], NULL);
1098 if (ret) {
1099 errno = ret;
1100 perror("pthread_join");
1101 abort();
1102 }
1103 }
1104
1105 for (i = 0; i < CPU_SETSIZE; i++) {
1106 struct percpu_memcpy_buffer_node item;
1107
1108 if (!CPU_ISSET(i, &allowed_cpus))
1109 continue;
1110
1111 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1112 sum += item.data1;
1113 sum += item.data2;
1114 }
1115 free(buffer.c[i].array);
1116 }
1117
1118 /*
1119 * All entries should now be accounted for (unless some external
1120 * actor is interfering with our allowed affinity while this
1121 * test is running).
1122 */
1123 assert(sum == expected_sum);
1124 }
1125
test_signal_interrupt_handler(int signo)1126 static void test_signal_interrupt_handler(int signo)
1127 {
1128 signals_delivered++;
1129 }
1130
set_signal_handler(void)1131 static int set_signal_handler(void)
1132 {
1133 int ret = 0;
1134 struct sigaction sa;
1135 sigset_t sigset;
1136
1137 ret = sigemptyset(&sigset);
1138 if (ret < 0) {
1139 perror("sigemptyset");
1140 return ret;
1141 }
1142
1143 sa.sa_handler = test_signal_interrupt_handler;
1144 sa.sa_mask = sigset;
1145 sa.sa_flags = 0;
1146 ret = sigaction(SIGUSR1, &sa, NULL);
1147 if (ret < 0) {
1148 perror("sigaction");
1149 return ret;
1150 }
1151
1152 printf_verbose("Signal handler set for SIGUSR1\n");
1153
1154 return ret;
1155 }
1156
1157 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1158 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
1159 struct test_membarrier_thread_args {
1160 int stop;
1161 intptr_t percpu_list_ptr;
1162 };
1163
1164 /* Worker threads modify data in their "active" percpu lists. */
test_membarrier_worker_thread(void * arg)1165 void *test_membarrier_worker_thread(void *arg)
1166 {
1167 struct test_membarrier_thread_args *args =
1168 (struct test_membarrier_thread_args *)arg;
1169 const int iters = opt_reps;
1170 int i;
1171
1172 if (rseq_register_current_thread()) {
1173 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1174 errno, strerror(errno));
1175 abort();
1176 }
1177
1178 /* Wait for initialization. */
1179 while (!atomic_load(&args->percpu_list_ptr)) {}
1180
1181 for (i = 0; i < iters; ++i) {
1182 int ret;
1183
1184 do {
1185 int cpu = rseq_cpu_start();
1186
1187 ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
1188 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1189 } while (rseq_unlikely(ret));
1190 }
1191
1192 if (rseq_unregister_current_thread()) {
1193 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1194 errno, strerror(errno));
1195 abort();
1196 }
1197 return NULL;
1198 }
1199
test_membarrier_init_percpu_list(struct percpu_list * list)1200 void test_membarrier_init_percpu_list(struct percpu_list *list)
1201 {
1202 int i;
1203
1204 memset(list, 0, sizeof(*list));
1205 for (i = 0; i < CPU_SETSIZE; i++) {
1206 struct percpu_list_node *node;
1207
1208 node = malloc(sizeof(*node));
1209 assert(node);
1210 node->data = 0;
1211 node->next = NULL;
1212 list->c[i].head = node;
1213 }
1214 }
1215
test_membarrier_free_percpu_list(struct percpu_list * list)1216 void test_membarrier_free_percpu_list(struct percpu_list *list)
1217 {
1218 int i;
1219
1220 for (i = 0; i < CPU_SETSIZE; i++)
1221 free(list->c[i].head);
1222 }
1223
sys_membarrier(int cmd,int flags,int cpu_id)1224 static int sys_membarrier(int cmd, int flags, int cpu_id)
1225 {
1226 return syscall(__NR_membarrier, cmd, flags, cpu_id);
1227 }
1228
1229 /*
1230 * The manager thread swaps per-cpu lists that worker threads see,
1231 * and validates that there are no unexpected modifications.
1232 */
test_membarrier_manager_thread(void * arg)1233 void *test_membarrier_manager_thread(void *arg)
1234 {
1235 struct test_membarrier_thread_args *args =
1236 (struct test_membarrier_thread_args *)arg;
1237 struct percpu_list list_a, list_b;
1238 intptr_t expect_a = 0, expect_b = 0;
1239 int cpu_a = 0, cpu_b = 0;
1240
1241 if (rseq_register_current_thread()) {
1242 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1243 errno, strerror(errno));
1244 abort();
1245 }
1246
1247 /* Init lists. */
1248 test_membarrier_init_percpu_list(&list_a);
1249 test_membarrier_init_percpu_list(&list_b);
1250
1251 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1252
1253 while (!atomic_load(&args->stop)) {
1254 /* list_a is "active". */
1255 cpu_a = rand() % CPU_SETSIZE;
1256 /*
1257 * As list_b is "inactive", we should never see changes
1258 * to list_b.
1259 */
1260 if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1261 fprintf(stderr, "Membarrier test failed\n");
1262 abort();
1263 }
1264
1265 /* Make list_b "active". */
1266 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1267 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1268 MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
1269 errno != ENXIO /* missing CPU */) {
1270 perror("sys_membarrier");
1271 abort();
1272 }
1273 /*
1274 * Cpu A should now only modify list_b, so the values
1275 * in list_a should be stable.
1276 */
1277 expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1278
1279 cpu_b = rand() % CPU_SETSIZE;
1280 /*
1281 * As list_a is "inactive", we should never see changes
1282 * to list_a.
1283 */
1284 if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1285 fprintf(stderr, "Membarrier test failed\n");
1286 abort();
1287 }
1288
1289 /* Make list_a "active". */
1290 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1291 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1292 MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
1293 errno != ENXIO /* missing CPU*/) {
1294 perror("sys_membarrier");
1295 abort();
1296 }
1297 /* Remember a value from list_b. */
1298 expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1299 }
1300
1301 test_membarrier_free_percpu_list(&list_a);
1302 test_membarrier_free_percpu_list(&list_b);
1303
1304 if (rseq_unregister_current_thread()) {
1305 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1306 errno, strerror(errno));
1307 abort();
1308 }
1309 return NULL;
1310 }
1311
test_membarrier(void)1312 void test_membarrier(void)
1313 {
1314 const int num_threads = opt_threads;
1315 struct test_membarrier_thread_args thread_args;
1316 pthread_t worker_threads[num_threads];
1317 pthread_t manager_thread;
1318 int i, ret;
1319
1320 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1321 perror("sys_membarrier");
1322 abort();
1323 }
1324
1325 thread_args.stop = 0;
1326 thread_args.percpu_list_ptr = 0;
1327 ret = pthread_create(&manager_thread, NULL,
1328 test_membarrier_manager_thread, &thread_args);
1329 if (ret) {
1330 errno = ret;
1331 perror("pthread_create");
1332 abort();
1333 }
1334
1335 for (i = 0; i < num_threads; i++) {
1336 ret = pthread_create(&worker_threads[i], NULL,
1337 test_membarrier_worker_thread, &thread_args);
1338 if (ret) {
1339 errno = ret;
1340 perror("pthread_create");
1341 abort();
1342 }
1343 }
1344
1345
1346 for (i = 0; i < num_threads; i++) {
1347 ret = pthread_join(worker_threads[i], NULL);
1348 if (ret) {
1349 errno = ret;
1350 perror("pthread_join");
1351 abort();
1352 }
1353 }
1354
1355 atomic_store(&thread_args.stop, 1);
1356 ret = pthread_join(manager_thread, NULL);
1357 if (ret) {
1358 errno = ret;
1359 perror("pthread_join");
1360 abort();
1361 }
1362 }
1363 #else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
test_membarrier(void)1364 void test_membarrier(void)
1365 {
1366 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1367 "Skipping membarrier test.\n");
1368 }
1369 #endif
1370
show_usage(int argc,char ** argv)1371 static void show_usage(int argc, char **argv)
1372 {
1373 printf("Usage : %s <OPTIONS>\n",
1374 argv[0]);
1375 printf("OPTIONS:\n");
1376 printf(" [-1 loops] Number of loops for delay injection 1\n");
1377 printf(" [-2 loops] Number of loops for delay injection 2\n");
1378 printf(" [-3 loops] Number of loops for delay injection 3\n");
1379 printf(" [-4 loops] Number of loops for delay injection 4\n");
1380 printf(" [-5 loops] Number of loops for delay injection 5\n");
1381 printf(" [-6 loops] Number of loops for delay injection 6\n");
1382 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1383 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1384 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1385 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1386 printf(" [-y] Yield\n");
1387 printf(" [-k] Kill thread with signal\n");
1388 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1389 printf(" [-t N] Number of threads (default 200)\n");
1390 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1391 printf(" [-d] Disable rseq system call (no initialization)\n");
1392 printf(" [-D M] Disable rseq for each M threads\n");
1393 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1394 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1395 printf(" [-v] Verbose output.\n");
1396 printf(" [-h] Show this help.\n");
1397 printf("\n");
1398 }
1399
main(int argc,char ** argv)1400 int main(int argc, char **argv)
1401 {
1402 int i;
1403
1404 for (i = 1; i < argc; i++) {
1405 if (argv[i][0] != '-')
1406 continue;
1407 switch (argv[i][1]) {
1408 case '1':
1409 case '2':
1410 case '3':
1411 case '4':
1412 case '5':
1413 case '6':
1414 case '7':
1415 case '8':
1416 case '9':
1417 if (argc < i + 2) {
1418 show_usage(argc, argv);
1419 goto error;
1420 }
1421 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1422 i++;
1423 break;
1424 case 'm':
1425 if (argc < i + 2) {
1426 show_usage(argc, argv);
1427 goto error;
1428 }
1429 opt_modulo = atol(argv[i + 1]);
1430 if (opt_modulo < 0) {
1431 show_usage(argc, argv);
1432 goto error;
1433 }
1434 i++;
1435 break;
1436 case 's':
1437 if (argc < i + 2) {
1438 show_usage(argc, argv);
1439 goto error;
1440 }
1441 opt_sleep = atol(argv[i + 1]);
1442 if (opt_sleep < 0) {
1443 show_usage(argc, argv);
1444 goto error;
1445 }
1446 i++;
1447 break;
1448 case 'y':
1449 opt_yield = 1;
1450 break;
1451 case 'k':
1452 opt_signal = 1;
1453 break;
1454 case 'd':
1455 opt_disable_rseq = 1;
1456 break;
1457 case 'D':
1458 if (argc < i + 2) {
1459 show_usage(argc, argv);
1460 goto error;
1461 }
1462 opt_disable_mod = atol(argv[i + 1]);
1463 if (opt_disable_mod < 0) {
1464 show_usage(argc, argv);
1465 goto error;
1466 }
1467 i++;
1468 break;
1469 case 't':
1470 if (argc < i + 2) {
1471 show_usage(argc, argv);
1472 goto error;
1473 }
1474 opt_threads = atol(argv[i + 1]);
1475 if (opt_threads < 0) {
1476 show_usage(argc, argv);
1477 goto error;
1478 }
1479 i++;
1480 break;
1481 case 'r':
1482 if (argc < i + 2) {
1483 show_usage(argc, argv);
1484 goto error;
1485 }
1486 opt_reps = atoll(argv[i + 1]);
1487 if (opt_reps < 0) {
1488 show_usage(argc, argv);
1489 goto error;
1490 }
1491 i++;
1492 break;
1493 case 'h':
1494 show_usage(argc, argv);
1495 goto end;
1496 case 'T':
1497 if (argc < i + 2) {
1498 show_usage(argc, argv);
1499 goto error;
1500 }
1501 opt_test = *argv[i + 1];
1502 switch (opt_test) {
1503 case 's':
1504 case 'l':
1505 case 'i':
1506 case 'b':
1507 case 'm':
1508 case 'r':
1509 break;
1510 default:
1511 show_usage(argc, argv);
1512 goto error;
1513 }
1514 i++;
1515 break;
1516 case 'v':
1517 verbose = 1;
1518 break;
1519 case 'M':
1520 opt_mb = 1;
1521 break;
1522 default:
1523 show_usage(argc, argv);
1524 goto error;
1525 }
1526 }
1527
1528 loop_cnt_1 = loop_cnt[1];
1529 loop_cnt_2 = loop_cnt[2];
1530 loop_cnt_3 = loop_cnt[3];
1531 loop_cnt_4 = loop_cnt[4];
1532 loop_cnt_5 = loop_cnt[5];
1533 loop_cnt_6 = loop_cnt[6];
1534
1535 if (set_signal_handler())
1536 goto error;
1537
1538 if (!opt_disable_rseq && rseq_register_current_thread())
1539 goto error;
1540 switch (opt_test) {
1541 case 's':
1542 printf_verbose("spinlock\n");
1543 test_percpu_spinlock();
1544 break;
1545 case 'l':
1546 printf_verbose("linked list\n");
1547 test_percpu_list();
1548 break;
1549 case 'b':
1550 printf_verbose("buffer\n");
1551 test_percpu_buffer();
1552 break;
1553 case 'm':
1554 printf_verbose("memcpy buffer\n");
1555 test_percpu_memcpy_buffer();
1556 break;
1557 case 'i':
1558 printf_verbose("counter increment\n");
1559 test_percpu_inc();
1560 break;
1561 case 'r':
1562 printf_verbose("membarrier\n");
1563 test_membarrier();
1564 break;
1565 }
1566 if (!opt_disable_rseq && rseq_unregister_current_thread())
1567 abort();
1568 end:
1569 return 0;
1570
1571 error:
1572 return -1;
1573 }
1574