1 /* Measure memmove function combined throughput for different alignments.
2    Copyright (C) 2017-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 /* This microbenchmark measures the throughput of memmove for various sizes from
20    1 byte to 32MiB, doubling every iteration and then misaligning by 0-15
21    bytes.  The copies are done from source to destination and then back and the
22    source walks forward across the array and the destination walks backward by
23    one byte each, thus measuring misaligned accesses as well.  The idea is to
24    avoid caching effects by copying a different string and far enough from each
25    other, walking in different directions so that we can measure prefetcher
26    efficiency (software or hardware) more closely than with a loop copying the
27    same data over and over, which eventually only gives us L1 cache
28    performance.  */
29 
30 #ifndef MEMMOVE_RESULT
31 # define MEMMOVE_RESULT(dst, len) dst
32 # define START_SIZE 128
33 # define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024)
34 # define TEST_MAIN
35 # define TEST_NAME "memmove"
36 # define TIMEOUT (20 * 60)
37 # include "bench-string.h"
38 
39 #define NO_OVERLAP 0
40 #define PARTIAL_OVERLAP 1
41 #define COMPLETE_OVERLAP 2
42 
43 IMPL (memmove, 1)
44 #endif
45 
46 #include "json-lib.h"
47 
48 typedef char *(*proto_t) (char *, const char *, size_t);
49 
50 static void
do_one_test(json_ctx_t * json_ctx,impl_t * impl,char * dst,char * src,size_t len)51 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
52 	     size_t len)
53 {
54   size_t i = 0;
55   timing_t start, stop, cur;
56 
57   char *dst_end = dst + MIN_PAGE_SIZE - len;
58   char *src_end = src + MIN_PAGE_SIZE - len;
59 
60   TIMING_NOW (start);
61   /* Copy the entire buffer backwards, LEN at a time.  */
62   for (; src_end >= src && dst <= dst_end; dst += len, src_end -= len, i++)
63     CALL (impl, dst, src_end, len);
64   TIMING_NOW (stop);
65 
66   TIMING_DIFF (cur, start, stop);
67 
68   /* Get time taken per function call.  */
69   json_element_double (json_ctx, (double) cur / i);
70 }
71 
72 static void
do_test(json_ctx_t * json_ctx,size_t len,int overlap,int both_ways)73 do_test (json_ctx_t *json_ctx, size_t len, int overlap, int both_ways)
74 {
75   char *s1, *s2, *tmp;
76   size_t repeats;
77 
78   s1 = (char *) (buf1);
79   s2 = (char *) (buf2);
80   if (overlap != NO_OVERLAP)
81     s2 = s1;
82   if (overlap == PARTIAL_OVERLAP)
83     s2 += len / 2;
84 
85   for (repeats = both_ways ? 2 : 1; repeats; --repeats)
86     {
87       json_element_object_begin (json_ctx);
88       json_attr_uint (json_ctx, "length", (double) len);
89       json_attr_string(json_ctx, "overlap",
90                        overlap == NO_OVERLAP        ? "none"
91                        : overlap == PARTIAL_OVERLAP ? "partial"
92                                                     : "complete");
93       json_attr_uint (json_ctx, "dst > src", (double) (s2 > s1));
94       json_array_begin (json_ctx, "timings");
95 
96 
97       FOR_EACH_IMPL (impl, 0)
98         do_one_test (json_ctx, impl, (char *) buf2, (char *) buf1, len);
99 
100       json_array_end (json_ctx);
101       json_element_object_end (json_ctx);
102 
103       tmp = s1;
104       s1 = s2;
105       s2 = tmp;
106     }
107 }
108 
109 int
test_main(void)110 test_main (void)
111 {
112   json_ctx_t json_ctx;
113 
114   test_init ();
115 
116   json_init (&json_ctx, 0, stdout);
117 
118   json_document_begin (&json_ctx);
119   json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
120 
121   json_attr_object_begin (&json_ctx, "functions");
122   json_attr_object_begin (&json_ctx, "memmove");
123   json_attr_string (&json_ctx, "bench-variant", "walk");
124 
125   json_array_begin (&json_ctx, "ifuncs");
126   FOR_EACH_IMPL (impl, 0)
127     json_element_string (&json_ctx, impl->name);
128   json_array_end (&json_ctx);
129 
130   json_array_begin (&json_ctx, "results");
131   /* Non-overlapping buffers.  */
132   for (size_t i = START_SIZE; i <= MIN_PAGE_SIZE; i <<= 1)
133     {
134       do_test (&json_ctx, i, NO_OVERLAP, 1);
135       do_test (&json_ctx, i + 1, NO_OVERLAP, 1);
136     }
137 
138   /* Partially-overlapping buffers.  */
139   for (size_t i = START_SIZE; i <= MIN_PAGE_SIZE / 2; i <<= 1)
140     {
141       do_test (&json_ctx, i, PARTIAL_OVERLAP, 1);
142       do_test (&json_ctx, i + 1, PARTIAL_OVERLAP, 1);
143     }
144 
145   /* Complete-overlapping buffers.  */
146   for (size_t i = START_SIZE; i <= MIN_PAGE_SIZE; i <<= 1)
147     {
148       do_test (&json_ctx, i, COMPLETE_OVERLAP, 0);
149       do_test (&json_ctx, i + 1, COMPLETE_OVERLAP, 0);
150     }
151 
152   json_array_end (&json_ctx);
153   json_attr_object_end (&json_ctx);
154   json_attr_object_end (&json_ctx);
155   json_document_end (&json_ctx);
156 
157   return ret;
158 }
159 
160 #include <support/test-driver.c>
161