1# Copyright (C) 2013-2022 Free Software Foundation, Inc.
2# Copyright The GNU Toolchain Authors.
3# This file is part of the GNU C Library.
4
5# The GNU C Library is free software; you can redistribute it and/or
6# modify it under the terms of the GNU Lesser General Public
7# License as published by the Free Software Foundation; either
8# version 2.1 of the License, or (at your option) any later version.
9
10# The GNU C Library is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13# Lesser General Public License for more details.
14
15# You should have received a copy of the GNU Lesser General Public
16# License along with the GNU C Library; if not, see
17# <https://www.gnu.org/licenses/>.
18
19# Makefile for benchmark tests.  The only useful target here is `bench`.
20# Add benchmark functions in alphabetical order.
21
22subdir := benchtests
23
24include ../Makeconfig
25
26bench-math := \
27  acos \
28  acosh \
29  asin \
30  asinh \
31  atan \
32  atan2 \
33  atanh \
34  cbrt \
35  cos \
36  cosf \
37  cosh \
38  erf \
39  erfc \
40  exp \
41  exp10 \
42  exp10f \
43  exp2 \
44  exp2f \
45  expf \
46  expm1 \
47  fmax \
48  fmaxf \
49  fmin \
50  fminf \
51  hypot \
52  hypotf \
53  ilogb \
54  ilogbf \
55  isfinite \
56  isinf \
57  isnan \
58  j0 \
59  j1 \
60  lgamma \
61  log \
62  log10 \
63  log1p \
64  log2 \
65  log2f \
66  logb \
67  logbf \
68  logf \
69  modf \
70  pow \
71  powf \
72  rint \
73  roundeven \
74  roundevenf \
75  sin \
76  sincos \
77  sincosf \
78  sinf \
79  sinh \
80  sqrt \
81  tan \
82  tanh \
83  tgamma \
84  trunc \
85  truncf \
86  y0 \
87  y1 \
88
89ifneq (,$(filter yes,$(float96-fcts)))
90bench-math += \
91  cbrtl \
92# bench-math
93endif
94
95ifneq (,$(filter yes,$(float128-fcts) $(float128-alias-fcts)))
96bench-math += \
97  expf128 \
98  ilogbf128 \
99  powf128 \
100  sinf128 \
101# bench-math
102endif
103
104bench-pthread := \
105  pthread-locks \
106  pthread-mutex-locks \
107  pthread_once \
108  thread_create \
109# bench-pthread
110
111bench-string := \
112  ffs \
113  ffsll \
114# bench-string
115
116# String function benchmarks.
117string-benchset := \
118  bzero \
119  bzero-large \
120  bzero-walk \
121  memccpy \
122  memchr \
123  memcmp \
124  memcmpeq \
125  memcpy \
126  memcpy-large \
127  memcpy-random \
128  memcpy-walk \
129  memmem \
130  memmove \
131  memmove-large \
132  memmove-walk \
133  mempcpy \
134  memrchr \
135  memset \
136  memset-large \
137  memset-walk \
138  memset-zero \
139  memset-zero-large \
140  memset-zero-walk \
141  rawmemchr \
142  stpcpy \
143  stpcpy_chk \
144  stpncpy \
145  strcasecmp \
146  strcasestr \
147  strcat \
148  strchr \
149  strchrnul \
150  strcmp \
151  strcoll \
152  strcpy \
153  strcpy_chk \
154  strcspn \
155  strlen \
156  strncasecmp \
157  strncat \
158  strncmp \
159  strncpy \
160  strnlen \
161  strpbrk \
162  strrchr \
163  strsep \
164  strspn \
165  strstr \
166  strtok \
167# string-benchset
168
169# Build and run locale-dependent benchmarks only if we're building natively.
170ifeq (no,$(cross-compiling))
171wcsmbs-benchset := \
172  wcpcpy \
173  wcpncpy \
174  wcrtomb \
175  wcscat \
176  wcschr \
177  wcschrnul \
178  wcscmp \
179  wcscpy \
180  wcscspn \
181  wcslen \
182  wcsncat \
183  wcsncmp \
184  wcsncpy \
185  wcsnlen \
186  wcspbrk \
187  wcsrchr \
188  wcsspn \
189  wmemchr \
190  wmemcmp \
191  wmemset \
192# wcsmbs-benchset
193else
194wcsmbs-benchset :=
195endif
196
197string-benchset-all := $(string-benchset) ${wcsmbs-benchset}
198
199ifeq (no,$(cross-compiling))
200# We have to generate locales
201LOCALES := \
202  ar_SA.UTF-8 \
203  cs_CZ.UTF-8 \
204  da_DK.UTF-8 \
205  el_GR.UTF-8 \
206  en_GB.UTF-8 \
207  en_US.UTF-8 \
208  es_ES.UTF-8 \
209  fa_IR.UTF-8 \
210  fr_FR.UTF-8 \
211  he_IL.UTF-8 \
212  hi_IN.UTF-8 \
213  hu_HU.UTF-8 \
214  is_IS.UTF-8 \
215  it_IT.UTF-8 \
216  ja_JP.UTF-8 \
217  pl_PL.UTF-8 \
218  pt_PT.UTF-8 \
219  ru_RU.UTF-8 \
220  si_LK.UTF-8 \
221  sr_RS.UTF-8 \
222  sv_SE.UTF-8 \
223  tr_TR.UTF-8 \
224  vi_VN.UTF-8 \
225  zh_CN.UTF-8 \
226# LOCALES
227include ../gen-locales.mk
228endif
229
230hash-benchset := \
231  dl-elf-hash \
232  dl-new-hash \
233  nss-hash \
234# hash-benchset
235
236stdlib-benchset := \
237  arc4random \
238  strtod \
239  # stdlib-benchset
240
241stdio-common-benchset := sprintf
242
243math-benchset := math-inlines
244
245ifeq (${BENCHSET},)
246benchset := $(string-benchset-all) $(stdlib-benchset) $(stdio-common-benchset) \
247	    $(math-benchset) $(hash-benchset)
248else
249benchset := $(foreach B,$(filter %-benchset,${BENCHSET}), ${${B}})
250endif
251
252CFLAGS-bench-ffs.c += -fno-builtin
253CFLAGS-bench-ffsll.c += -fno-builtin
254CFLAGS-bench-sqrt.c += -fno-builtin
255CFLAGS-bench-fmin.c += -fno-builtin
256CFLAGS-bench-fminf.c += -fno-builtin
257CFLAGS-bench-fmax.c += -fno-builtin
258CFLAGS-bench-fmaxf.c += -fno-builtin
259CFLAGS-bench-trunc.c += -fno-builtin
260CFLAGS-bench-truncf.c += -fno-builtin
261CFLAGS-bench-roundeven.c += -fno-builtin
262CFLAGS-bench-roundevenf.c += -fno-builtin
263CFLAGS-bench-isnan.c += -fsignaling-nans
264CFLAGS-bench-isinf.c += -fsignaling-nans
265CFLAGS-bench-isfinite.c += -fsignaling-nans
266
267ifeq (${BENCHSET},)
268bench-malloc := malloc-thread malloc-simple
269else
270bench-malloc := $(filter malloc-%,${BENCHSET})
271endif
272
273ifeq (${STATIC-BENCHTESTS},yes)
274+link-benchtests = $(+link-static-tests)
275link-libc-benchtests = $(link-libc-static)
276libm-benchtests = $(common-objpfx)math/libm.a
277thread-library-benchtests = $(static-thread-library)
278else
279link-libc-benchtests = $(link-libc)
280+link-benchtests = $(+link-tests)
281thread-library-benchtests = $(shared-thread-library)
282libm-benchtests = $(libm)
283endif
284
285$(addprefix $(objpfx)bench-,$(bench-math)): $(libm-benchtests)
286$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm-benchtests)
287$(addprefix $(objpfx)bench-,$(bench-pthread)): $(thread-library-benchtests)
288$(addprefix $(objpfx)bench-,$(bench-malloc)): $(thread-library-benchtests)
289$(addprefix $(objpfx)bench-,pthread-locks): $(libm-benchtests)
290$(addprefix $(objpfx)bench-,pthread-mutex-locks): $(libm-benchtests)
291
292
293
294# Rules to build and execute the benchmarks.  Do not put any benchmark
295# parameters beyond this point.
296
297# We don't want the benchmark programs to run in parallel since that could
298# affect their performance.
299.NOTPARALLEL:
300
301bench-extra-objs = json-lib.o
302
303extra-objs += $(bench-extra-objs)
304others-extras = $(bench-extra-objs)
305
306# The default duration: 1 seconds.
307ifndef BENCH_DURATION
308BENCH_DURATION := 1
309endif
310
311CPPFLAGS-nonlib += -DDURATION=$(BENCH_DURATION) -D_ISOMAC
312
313# Use clock_gettime to measure performance of functions.  The default is
314# to use the architecture-specific high precision timing instructions.
315ifdef USE_CLOCK_GETTIME
316CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME
317else
318# On x86 processors, use RDTSCP, instead of RDTSC, to measure performance
319# of functions.  All x86 processors since 2010 support RDTSCP instruction.
320ifdef USE_RDTSCP
321CPPFLAGS-nonlib += -DUSE_RDTSCP
322endif
323endif
324
325DETAILED_OPT :=
326
327ifdef DETAILED
328DETAILED_OPT := -d
329endif
330
331bench-deps := bench-skeleton.c bench-timing.h Makefile
332
333run-bench = $(test-wrapper-env) \
334	    $(run-program-env) \
335	    $($*-ENV) $(test-via-rtld-prefix) $${run}
336
337timing-type := $(objpfx)bench-timing-type
338extra-objs += bench-timing-type.o
339
340include ../Rules
341
342bench-math += $(bench-libmvec)
343
344ifeq (${BENCHSET},)
345bench := $(bench-math) $(bench-pthread) $(bench-string)
346else
347bench := $(foreach B,$(filter bench-%,${BENCHSET}), ${${B}})
348endif
349
350# NB: Use "=" instead of ":=" since sysdeps Makefiles may add more
351# benches.
352binaries-bench = $(addprefix $(objpfx)bench-,$(bench))
353extra-objs += $(addsuffix .o,$(addprefix bench-,$(bench)))
354binaries-benchset = $(addprefix $(objpfx)bench-,$(benchset))
355extra-objs += $(addsuffix .o,$(addprefix bench-,$(benchset)))
356binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc))
357extra-objs += $(addsuffix .o,$(addprefix bench-,$(bench-malloc)))
358
359# This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
360# for all these modules.
361cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c) \
362		 $(binaries-bench-malloc:=.c) $(timing-type:=.c)
363lib := nonlib
364include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
365
366bench-clean:
367	rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench))
368	rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset))
369	rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc))
370	rm -f $(timing-type) $(addsuffix .o,$(timing-type))
371	rm -f $(addprefix $(objpfx),$(bench-extra-objs))
372
373# Validate the passed in BENCHSET
374ifneq ($(strip ${BENCHSET}),)
375VALIDBENCHSETNAMES := \
376  bench-math \
377  bench-pthread \
378  bench-string \
379  hash-benchset \
380  malloc-simple \
381  malloc-thread \
382  math-benchset \
383  stdio-common-benchset \
384  stdlib-benchset \
385  string-benchset \
386  wcsmbs-benchset \
387# VALIDBENCHSETNAMES
388
389INVALIDBENCHSETNAMES := $(filter-out ${VALIDBENCHSETNAMES},${BENCHSET})
390ifneq (${INVALIDBENCHSETNAMES},)
391$(info The following values in BENCHSET are invalid: ${INVALIDBENCHSETNAMES})
392$(info The valid ones are: ${VALIDBENCHSETNAMES})
393$(error Invalid BENCHSET value)
394endif
395endif
396
397bench: bench-build bench-set bench-func bench-malloc
398
399# Target to only build the benchmark without running it.  We generate locales
400# only if we're building natively.
401ifeq (no,$(cross-compiling))
402bench-build: $(gen-locales) $(timing-type) $(binaries-bench) \
403	$(binaries-benchset) $(binaries-bench-malloc)
404else
405bench-build: $(timing-type) $(binaries-bench) $(binaries-benchset) \
406	$(binaries-bench-malloc)
407endif
408
409bench-set: $(binaries-benchset)
410	for run in $^; do \
411	  echo "Running $${run}"; \
412	  $(run-bench) > $${run}.out; \
413	done
414
415bench-malloc: $(binaries-bench-malloc)
416	for run in $^; do \
417	  echo "$${run}"; \
418	  if [ `basename $${run}` = "bench-malloc-thread" ]; then \
419		for thr in 1 8 16 32; do \
420			echo "Running $${run} $${thr}"; \
421			$(run-bench) $${thr} > $${run}-$${thr}.out; \
422		done;\
423	  else \
424		for thr in 8 16 32 64 128 256 512 1024 2048 4096; do \
425		  echo "Running $${run} $${thr}"; \
426		  $(run-bench) $${thr} > $${run}-$${thr}.out; \
427		done;\
428	  fi;\
429	done
430
431# Build and execute the benchmark functions.  This target generates JSON
432# formatted bench.out.  Each of the programs produce independent JSON output,
433# so one could even execute them individually and process it using any JSON
434# capable language or tool.
435bench-func: $(binaries-bench)
436	if [ -n '$^' ] ; then \
437	{ timing_type=$$($(test-wrapper-env) \
438			 $(run-program-env) \
439			 $(test-via-rtld-prefix) \
440			 $(timing-type)); \
441	  echo "{\"timing_type\": \"$${timing_type}\","; \
442	  echo " \"functions\": {"; \
443	  for run in $^; do \
444	    op=$$($(run-bench) $(DETAILED_OPT)); \
445	    ret=$$?; \
446	    case "$${ret}" in \
447	      77) \
448	      echo "UNSUPPORTED $${run}: $${op}" >&2; \
449		;; \
450	      0) \
451		echo "Running $${run}" >&2; \
452		if [ "$${run}" != "$<" ]; then \
453		  echo ","; \
454		fi; \
455		echo "$${op}"; \
456		;; \
457	      *) \
458		echo "FAILED $${run}" >&2; \
459		;; \
460	    esac; \
461	  done; \
462	  echo; \
463	  echo " }"; \
464	  echo "}"; \
465	  } > $(objpfx)bench.out-tmp; \
466	  if [ -f $(objpfx)bench.out ]; then \
467	    mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
468	  fi; \
469	  mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out; \
470	  $(PYTHON) scripts/validate_benchout.py $(objpfx)bench.out \
471	  scripts/benchout.schema.json; \
472	fi
473
474ifeq ($(bind-now),yes)
475link-bench-bind-now = -Wl,-z,now
476endif
477
478bench-link-targets = $(timing-type) $(binaries-bench) $(binaries-benchset) \
479	$(binaries-bench-malloc)
480
481$(bench-link-targets): %: %.o $(objpfx)json-lib.o \
482	$(link-extra-libs-tests) \
483  $(sort $(filter $(common-objpfx)lib%,$(link-libc-benchtests))) \
484  $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
485	$(+link-benchtests)
486
487$(bench-link-targets): LDFLAGS += $(link-bench-bind-now)
488
489$(objpfx)bench-%.c: %-inputs $(bench-deps)
490	{ if [ -n "$($*-INCLUDE)" ]; then \
491	  cat $($*-INCLUDE); \
492	fi; \
493	$(PYTHON) scripts/bench.py $(patsubst %-inputs,%,$<); } > $@-tmp
494	mv -f $@-tmp $@
495