1 /* Benchmark malloc and free functions.
2    Copyright (C) 2013-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <errno.h>
20 #include <math.h>
21 #include <pthread.h>
22 #include <signal.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/time.h>
27 #include <sys/resource.h>
28 #include <unistd.h>
29 
30 #include "bench-timing.h"
31 #include "json-lib.h"
32 
33 /* Benchmark duration in seconds.  */
34 #define BENCHMARK_DURATION	10
35 #define RAND_SEED		88
36 
37 #ifndef NUM_THREADS
38 # define NUM_THREADS 1
39 #endif
40 
41 /* Maximum memory that can be allocated at any one time is:
42 
43    NUM_THREADS * WORKING_SET_SIZE * MAX_ALLOCATION_SIZE
44 
45    However due to the distribution of the random block sizes
46    the typical amount allocated will be much smaller.  */
47 #define WORKING_SET_SIZE	1024
48 
49 #define MIN_ALLOCATION_SIZE	4
50 #define MAX_ALLOCATION_SIZE	32768
51 
52 /* Get a random block size with an inverse square distribution.  */
53 static unsigned int
get_block_size(unsigned int rand_data)54 get_block_size (unsigned int rand_data)
55 {
56   /* Inverse square.  */
57   const float exponent = -2;
58   /* Minimum value of distribution.  */
59   const float dist_min = MIN_ALLOCATION_SIZE;
60   /* Maximum value of distribution.  */
61   const float dist_max = MAX_ALLOCATION_SIZE;
62 
63   float min_pow = powf (dist_min, exponent + 1);
64   float max_pow = powf (dist_max, exponent + 1);
65 
66   float r = (float) rand_data / RAND_MAX;
67 
68   return (unsigned int) powf ((max_pow - min_pow) * r + min_pow,
69 			      1 / (exponent + 1));
70 }
71 
72 #define NUM_BLOCK_SIZES	8000
73 #define NUM_OFFSETS	((WORKING_SET_SIZE) * 4)
74 
75 static unsigned int random_block_sizes[NUM_BLOCK_SIZES];
76 static unsigned int random_offsets[NUM_OFFSETS];
77 
78 static void
init_random_values(void)79 init_random_values (void)
80 {
81   for (size_t i = 0; i < NUM_BLOCK_SIZES; i++)
82     random_block_sizes[i] = get_block_size (rand ());
83 
84   for (size_t i = 0; i < NUM_OFFSETS; i++)
85     random_offsets[i] = rand () % WORKING_SET_SIZE;
86 }
87 
88 static unsigned int
get_random_block_size(unsigned int * state)89 get_random_block_size (unsigned int *state)
90 {
91   unsigned int idx = *state;
92 
93   if (idx >= NUM_BLOCK_SIZES - 1)
94     idx = 0;
95   else
96     idx++;
97 
98   *state = idx;
99 
100   return random_block_sizes[idx];
101 }
102 
103 static unsigned int
get_random_offset(unsigned int * state)104 get_random_offset (unsigned int *state)
105 {
106   unsigned int idx = *state;
107 
108   if (idx >= NUM_OFFSETS - 1)
109     idx = 0;
110   else
111     idx++;
112 
113   *state = idx;
114 
115   return random_offsets[idx];
116 }
117 
118 static volatile bool timeout;
119 
120 static void
alarm_handler(int signum)121 alarm_handler (int signum)
122 {
123   timeout = true;
124 }
125 
126 /* Allocate and free blocks in a random order.  */
127 static size_t
malloc_benchmark_loop(void ** ptr_arr)128 malloc_benchmark_loop (void **ptr_arr)
129 {
130   unsigned int offset_state = 0, block_state = 0;
131   size_t iters = 0;
132 
133   while (!timeout)
134     {
135       unsigned int next_idx = get_random_offset (&offset_state);
136       unsigned int next_block = get_random_block_size (&block_state);
137 
138       free (ptr_arr[next_idx]);
139 
140       ptr_arr[next_idx] = malloc (next_block);
141 
142       iters++;
143     }
144 
145   return iters;
146 }
147 
148 struct thread_args
149 {
150   size_t iters;
151   void **working_set;
152   timing_t elapsed;
153 };
154 
155 static void *
benchmark_thread(void * arg)156 benchmark_thread (void *arg)
157 {
158   struct thread_args *args = (struct thread_args *) arg;
159   size_t iters;
160   void *thread_set = args->working_set;
161   timing_t start, stop;
162 
163   TIMING_NOW (start);
164   iters = malloc_benchmark_loop (thread_set);
165   TIMING_NOW (stop);
166 
167   TIMING_DIFF (args->elapsed, start, stop);
168   args->iters = iters;
169 
170   return NULL;
171 }
172 
173 static timing_t
do_benchmark(size_t num_threads,size_t * iters)174 do_benchmark (size_t num_threads, size_t *iters)
175 {
176   timing_t elapsed = 0;
177 
178   if (num_threads == 1)
179     {
180       timing_t start, stop;
181       void *working_set[WORKING_SET_SIZE];
182 
183       memset (working_set, 0, sizeof (working_set));
184 
185       TIMING_NOW (start);
186       *iters = malloc_benchmark_loop (working_set);
187       TIMING_NOW (stop);
188 
189       TIMING_DIFF (elapsed, start, stop);
190     }
191   else
192     {
193       struct thread_args args[num_threads];
194       void *working_set[num_threads][WORKING_SET_SIZE];
195       pthread_t threads[num_threads];
196 
197       memset (working_set, 0, sizeof (working_set));
198 
199       *iters = 0;
200 
201       for (size_t i = 0; i < num_threads; i++)
202 	{
203 	  args[i].working_set = working_set[i];
204 	  pthread_create(&threads[i], NULL, benchmark_thread, &args[i]);
205 	}
206 
207       for (size_t i = 0; i < num_threads; i++)
208 	{
209 	  pthread_join(threads[i], NULL);
210 	  TIMING_ACCUM (elapsed, args[i].elapsed);
211 	  *iters += args[i].iters;
212 	}
213     }
214   return elapsed;
215 }
216 
usage(const char * name)217 static void usage(const char *name)
218 {
219   fprintf (stderr, "%s: <num_threads>\n", name);
220   exit (1);
221 }
222 
223 int
main(int argc,char ** argv)224 main (int argc, char **argv)
225 {
226   timing_t cur;
227   size_t iters = 0, num_threads = 1;
228   json_ctx_t json_ctx;
229   double d_total_s, d_total_i;
230   struct sigaction act;
231 
232   if (argc == 1)
233     num_threads = 1;
234   else if (argc == 2)
235     {
236       long ret;
237 
238       errno = 0;
239       ret = strtol(argv[1], NULL, 10);
240 
241       if (errno || ret == 0)
242 	usage(argv[0]);
243 
244       num_threads = ret;
245     }
246   else
247     usage(argv[0]);
248 
249   init_random_values ();
250 
251   json_init (&json_ctx, 0, stdout);
252 
253   json_document_begin (&json_ctx);
254 
255   json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
256 
257   json_attr_object_begin (&json_ctx, "functions");
258 
259   json_attr_object_begin (&json_ctx, "malloc");
260 
261   json_attr_object_begin (&json_ctx, "");
262 
263   memset (&act, 0, sizeof (act));
264   act.sa_handler = &alarm_handler;
265 
266   sigaction (SIGALRM, &act, NULL);
267 
268   alarm (BENCHMARK_DURATION);
269 
270   cur = do_benchmark (num_threads, &iters);
271 
272   struct rusage usage;
273   getrusage(RUSAGE_SELF, &usage);
274 
275   d_total_s = cur;
276   d_total_i = iters;
277 
278   json_attr_double (&json_ctx, "duration", d_total_s);
279   json_attr_double (&json_ctx, "iterations", d_total_i);
280   json_attr_double (&json_ctx, "time_per_iteration", d_total_s / d_total_i);
281   json_attr_double (&json_ctx, "max_rss", usage.ru_maxrss);
282 
283   json_attr_double (&json_ctx, "threads", num_threads);
284   json_attr_double (&json_ctx, "min_size", MIN_ALLOCATION_SIZE);
285   json_attr_double (&json_ctx, "max_size", MAX_ALLOCATION_SIZE);
286   json_attr_double (&json_ctx, "random_seed", RAND_SEED);
287 
288   json_attr_object_end (&json_ctx);
289 
290   json_attr_object_end (&json_ctx);
291 
292   json_attr_object_end (&json_ctx);
293 
294   json_document_end (&json_ctx);
295 
296   return 0;
297 }
298