1 /* Measure mutex_lock for different threads and critical sections.
2    Copyright (C) 2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #define TEST_MAIN
20 #define TEST_NAME "pthread-mutex-locks"
21 #define TIMEOUT (20 * 60)
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <math.h>
28 #include <pthread.h>
29 #include <sys/time.h>
30 #include <sys/sysinfo.h>
31 #include "bench-timing.h"
32 #include "json-lib.h"
33 
34 static pthread_mutex_t lock;
35 static pthread_mutexattr_t attr;
36 static pthread_barrier_t barrier;
37 
38 #define START_ITERS 1000
39 
40 #pragma GCC push_options
41 #pragma GCC optimize(1)
42 
fibonacci(int i)43 static int __attribute__ ((noinline)) fibonacci (int i)
44 {
45   asm("");
46   if (i > 2)
47     return fibonacci (i - 1) + fibonacci (i - 2);
48   return 10 + i;
49 }
50 
51 static void
do_filler(void)52 do_filler (void)
53 {
54   char buf1[512], buf2[512];
55   int f = fibonacci (4);
56   memcpy (buf1, buf2, f);
57 }
58 
59 static void
do_filler_shared(void)60 do_filler_shared (void)
61 {
62   static char buf1[512], buf2[512];
63   int f = fibonacci (4);
64   memcpy (buf1, buf2, f);
65 }
66 
67 #pragma GCC pop_options
68 
69 #define UNIT_WORK_CRT do_filler_shared ()
70 #define UNIT_WORK_NON_CRT do_filler ()
71 
72 static inline void
critical_section(int length)73 critical_section (int length)
74 {
75   for (int i = length; i >= 0; i--)
76     UNIT_WORK_CRT;
77 }
78 
79 static inline void
non_critical_section(int length)80 non_critical_section (int length)
81 {
82   for (int i = length; i >= 0; i--)
83     UNIT_WORK_NON_CRT;
84 }
85 
86 typedef struct Worker_Params
87 {
88   long iters;
89   int crt_len;
90   int non_crt_len;
91   timing_t duration;
92 } Worker_Params;
93 
94 static void *
worker(void * v)95 worker (void *v)
96 {
97   timing_t start, stop;
98   Worker_Params *p = (Worker_Params *) v;
99   long iters = p->iters;
100   int crt_len = p->crt_len;
101   int non_crt_len = p->non_crt_len;
102 
103   pthread_barrier_wait (&barrier);
104   TIMING_NOW (start);
105   while (iters--)
106     {
107       pthread_mutex_lock (&lock);
108       critical_section (crt_len);
109       pthread_mutex_unlock (&lock);
110       non_critical_section (non_crt_len);
111     }
112   TIMING_NOW (stop);
113 
114   TIMING_DIFF (p->duration, start, stop);
115   return NULL;
116 }
117 
118 static double
do_one_test(int num_threads,int crt_len,int non_crt_len,long iters)119 do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
120 {
121   int i;
122   timing_t mean;
123   Worker_Params *p, params[num_threads];
124   pthread_t threads[num_threads];
125 
126   pthread_mutex_init (&lock, &attr);
127   pthread_barrier_init (&barrier, NULL, num_threads);
128 
129   for (i = 0; i < num_threads; i++)
130     {
131       p = &params[i];
132       p->iters = iters;
133       p->crt_len = crt_len;
134       p->non_crt_len = non_crt_len;
135       pthread_create (&threads[i], NULL, worker, (void *) p);
136     }
137   for (i = 0; i < num_threads; i++)
138     pthread_join (threads[i], NULL);
139 
140   pthread_mutex_destroy (&lock);
141   pthread_barrier_destroy (&barrier);
142 
143   mean = 0;
144   for (i = 0; i < num_threads; i++)
145     mean += params[i].duration;
146   mean /= num_threads;
147   return mean;
148 }
149 
150 #define RUN_COUNT 10
151 #define MIN_TEST_SEC 0.01
152 
153 static void
do_bench_one(const char * name,int num_threads,int crt_len,int non_crt_len,json_ctx_t * js)154 do_bench_one (const char *name, int num_threads, int crt_len, int non_crt_len,
155 	      json_ctx_t *js)
156 {
157   timing_t cur;
158   struct timeval ts, te;
159   double tsd, ted, td;
160   long iters, iters_limit, total_iters;
161   timing_t curs[RUN_COUNT + 2];
162   int i, j;
163   double mean, stdev;
164 
165   iters = START_ITERS;
166   iters_limit = LONG_MAX / 100;
167 
168   while (1)
169     {
170       gettimeofday (&ts, NULL);
171       cur = do_one_test (num_threads, crt_len, non_crt_len, iters);
172       gettimeofday (&te, NULL);
173       /* Make sure the test to run at least MIN_TEST_SEC.  */
174       tsd = ts.tv_sec + ts.tv_usec / 1000000.0;
175       ted = te.tv_sec + te.tv_usec / 1000000.0;
176       td = ted - tsd;
177       if (td >= MIN_TEST_SEC || iters >= iters_limit)
178 	break;
179 
180       iters *= 10;
181     }
182 
183   curs[0] = cur;
184   for (i = 1; i < RUN_COUNT + 2; i++)
185     curs[i] = do_one_test (num_threads, crt_len, non_crt_len, iters);
186 
187   /* Sort the results so we can discard the fastest and slowest
188      times as outliers.  */
189   for (i = 0; i < RUN_COUNT + 1; i++)
190     for (j = i + 1; j < RUN_COUNT + 2; j++)
191       if (curs[i] > curs[j])
192 	{
193 	  timing_t temp = curs[i];
194 	  curs[i] = curs[j];
195 	  curs[j] = temp;
196 	}
197 
198   /* Calculate mean and standard deviation.  */
199   mean = 0.0;
200   total_iters = iters * num_threads;
201   for (i = 1; i < RUN_COUNT + 1; i++)
202     mean += (double) curs[i] / (double) total_iters;
203   mean /= RUN_COUNT;
204 
205   stdev = 0.0;
206   for (i = 1; i < RUN_COUNT + 1; i++)
207     {
208       double s = (double) curs[i] / (double) total_iters - mean;
209       stdev += s * s;
210     }
211   stdev = sqrt (stdev / (RUN_COUNT - 1));
212 
213   char buf[256];
214   snprintf (buf, sizeof buf, "%s,non_crt_len=%d,crt_len=%d,threads=%d", name,
215 	    non_crt_len, crt_len, num_threads);
216 
217   json_attr_object_begin (js, buf);
218 
219   json_attr_double (js, "duration", (double) cur);
220   json_attr_double (js, "iterations", (double) total_iters);
221   json_attr_double (js, "mean", mean);
222   json_attr_double (js, "stdev", stdev);
223   json_attr_double (js, "min-outlier",
224 		    (double) curs[0] / (double) total_iters);
225   json_attr_double (js, "min", (double) curs[1] / (double) total_iters);
226   json_attr_double (js, "max",
227 		    (double) curs[RUN_COUNT] / (double) total_iters);
228   json_attr_double (js, "max-outlier",
229 		    (double) curs[RUN_COUNT + 1] / (double) total_iters);
230 
231   json_attr_object_end (js);
232 }
233 
234 #define TH_CONF_MAX 10
235 
236 int
do_bench(void)237 do_bench (void)
238 {
239   int rv = 0;
240   json_ctx_t json_ctx;
241   int i, j, k;
242   int th_num, th_conf, nprocs;
243   int threads[TH_CONF_MAX];
244   int crt_lens[] = { 0, 1, 2, 4, 8, 16, 32, 64, 128 };
245   int non_crt_lens[] = { 1, 32, 128 };
246   char name[128];
247 
248   json_init (&json_ctx, 2, stdout);
249   json_attr_object_begin (&json_ctx, "pthread_mutex_locks");
250 
251   /* The thread config begins from 1, and increases by 2x until nprocs.
252      We also wants to test over-saturation case (1.25*nprocs).  */
253   nprocs = get_nprocs ();
254   th_num = 1;
255   for (th_conf = 0; th_conf < (TH_CONF_MAX - 2) && th_num < nprocs; th_conf++)
256     {
257       threads[th_conf] = th_num;
258       th_num <<= 1;
259     }
260   threads[th_conf++] = nprocs;
261   threads[th_conf++] = nprocs + nprocs / 4;
262 
263   pthread_mutexattr_init (&attr);
264   pthread_mutexattr_settype (&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
265   snprintf (name, sizeof name, "type=adaptive");
266 
267   for (k = 0; k < (sizeof (non_crt_lens) / sizeof (int)); k++)
268     {
269       int non_crt_len = non_crt_lens[k];
270       for (j = 0; j < (sizeof (crt_lens) / sizeof (int)); j++)
271 	{
272 	  int crt_len = crt_lens[j];
273 	  for (i = 0; i < th_conf; i++)
274 	    {
275 	      th_num = threads[i];
276 	      do_bench_one (name, th_num, crt_len, non_crt_len, &json_ctx);
277 	    }
278 	}
279     }
280 
281   json_attr_object_end (&json_ctx);
282 
283   return rv;
284 }
285 
286 #define TEST_FUNCTION do_bench ()
287 
288 #include "../test-skeleton.c"
289