1 /* _memcopy.c -- subroutines for memory copy functions.
2    Copyright (C) 1991-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...!  */
20 
21 #include <stddef.h>
22 #include <memcopy.h>
23 
24 /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
25    block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
26    Both SRCP and DSTP should be aligned for memory operations on `op_t's.  */
27 
28 #ifndef WORDCOPY_FWD_ALIGNED
29 # define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned
30 #endif
31 
32 void
WORDCOPY_FWD_ALIGNED(long int dstp,long int srcp,size_t len)33 WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len)
34 {
35   op_t a0, a1;
36 
37   switch (len % 8)
38     {
39     case 2:
40       a0 = ((op_t *) srcp)[0];
41       srcp -= 6 * OPSIZ;
42       dstp -= 7 * OPSIZ;
43       len += 6;
44       goto do1;
45     case 3:
46       a1 = ((op_t *) srcp)[0];
47       srcp -= 5 * OPSIZ;
48       dstp -= 6 * OPSIZ;
49       len += 5;
50       goto do2;
51     case 4:
52       a0 = ((op_t *) srcp)[0];
53       srcp -= 4 * OPSIZ;
54       dstp -= 5 * OPSIZ;
55       len += 4;
56       goto do3;
57     case 5:
58       a1 = ((op_t *) srcp)[0];
59       srcp -= 3 * OPSIZ;
60       dstp -= 4 * OPSIZ;
61       len += 3;
62       goto do4;
63     case 6:
64       a0 = ((op_t *) srcp)[0];
65       srcp -= 2 * OPSIZ;
66       dstp -= 3 * OPSIZ;
67       len += 2;
68       goto do5;
69     case 7:
70       a1 = ((op_t *) srcp)[0];
71       srcp -= 1 * OPSIZ;
72       dstp -= 2 * OPSIZ;
73       len += 1;
74       goto do6;
75 
76     case 0:
77       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
78 	return;
79       a0 = ((op_t *) srcp)[0];
80       srcp -= 0 * OPSIZ;
81       dstp -= 1 * OPSIZ;
82       goto do7;
83     case 1:
84       a1 = ((op_t *) srcp)[0];
85       srcp -=-1 * OPSIZ;
86       dstp -= 0 * OPSIZ;
87       len -= 1;
88       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
89 	goto do0;
90       goto do8;			/* No-op.  */
91     }
92 
93   do
94     {
95     do8:
96       a0 = ((op_t *) srcp)[0];
97       ((op_t *) dstp)[0] = a1;
98     do7:
99       a1 = ((op_t *) srcp)[1];
100       ((op_t *) dstp)[1] = a0;
101     do6:
102       a0 = ((op_t *) srcp)[2];
103       ((op_t *) dstp)[2] = a1;
104     do5:
105       a1 = ((op_t *) srcp)[3];
106       ((op_t *) dstp)[3] = a0;
107     do4:
108       a0 = ((op_t *) srcp)[4];
109       ((op_t *) dstp)[4] = a1;
110     do3:
111       a1 = ((op_t *) srcp)[5];
112       ((op_t *) dstp)[5] = a0;
113     do2:
114       a0 = ((op_t *) srcp)[6];
115       ((op_t *) dstp)[6] = a1;
116     do1:
117       a1 = ((op_t *) srcp)[7];
118       ((op_t *) dstp)[7] = a0;
119 
120       srcp += 8 * OPSIZ;
121       dstp += 8 * OPSIZ;
122       len -= 8;
123     }
124   while (len != 0);
125 
126   /* This is the right position for do0.  Please don't move
127      it into the loop.  */
128  do0:
129   ((op_t *) dstp)[0] = a1;
130 }
131 
132 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
133    block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
134    DSTP should be aligned for memory operations on `op_t's, but SRCP must
135    *not* be aligned.  */
136 
137 #ifndef WORDCOPY_FWD_DEST_ALIGNED
138 # define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned
139 #endif
140 
141 void
WORDCOPY_FWD_DEST_ALIGNED(long int dstp,long int srcp,size_t len)142 WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len)
143 {
144   op_t a0, a1, a2, a3;
145   int sh_1, sh_2;
146 
147   /* Calculate how to shift a word read at the memory operation
148      aligned srcp to make it aligned for copy.  */
149 
150   sh_1 = 8 * (srcp % OPSIZ);
151   sh_2 = 8 * OPSIZ - sh_1;
152 
153   /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
154      it points in the middle of.  */
155   srcp &= -OPSIZ;
156 
157   switch (len % 4)
158     {
159     case 2:
160       a1 = ((op_t *) srcp)[0];
161       a2 = ((op_t *) srcp)[1];
162       srcp -= 1 * OPSIZ;
163       dstp -= 3 * OPSIZ;
164       len += 2;
165       goto do1;
166     case 3:
167       a0 = ((op_t *) srcp)[0];
168       a1 = ((op_t *) srcp)[1];
169       srcp -= 0 * OPSIZ;
170       dstp -= 2 * OPSIZ;
171       len += 1;
172       goto do2;
173     case 0:
174       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
175 	return;
176       a3 = ((op_t *) srcp)[0];
177       a0 = ((op_t *) srcp)[1];
178       srcp -=-1 * OPSIZ;
179       dstp -= 1 * OPSIZ;
180       len += 0;
181       goto do3;
182     case 1:
183       a2 = ((op_t *) srcp)[0];
184       a3 = ((op_t *) srcp)[1];
185       srcp -=-2 * OPSIZ;
186       dstp -= 0 * OPSIZ;
187       len -= 1;
188       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
189 	goto do0;
190       goto do4;			/* No-op.  */
191     }
192 
193   do
194     {
195     do4:
196       a0 = ((op_t *) srcp)[0];
197       ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
198     do3:
199       a1 = ((op_t *) srcp)[1];
200       ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
201     do2:
202       a2 = ((op_t *) srcp)[2];
203       ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
204     do1:
205       a3 = ((op_t *) srcp)[3];
206       ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
207 
208       srcp += 4 * OPSIZ;
209       dstp += 4 * OPSIZ;
210       len -= 4;
211     }
212   while (len != 0);
213 
214   /* This is the right position for do0.  Please don't move
215      it into the loop.  */
216  do0:
217   ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
218 }
219 
220 /* _wordcopy_bwd_aligned -- Copy block finishing right before
221    SRCP to block finishing right before DSTP with LEN `op_t' words
222    (not LEN bytes!).  Both SRCP and DSTP should be aligned for memory
223    operations on `op_t's.  */
224 
225 #ifndef WORDCOPY_BWD_ALIGNED
226 # define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned
227 #endif
228 
229 void
WORDCOPY_BWD_ALIGNED(long int dstp,long int srcp,size_t len)230 WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len)
231 {
232   op_t a0, a1;
233 
234   switch (len % 8)
235     {
236     case 2:
237       srcp -= 2 * OPSIZ;
238       dstp -= 1 * OPSIZ;
239       a0 = ((op_t *) srcp)[1];
240       len += 6;
241       goto do1;
242     case 3:
243       srcp -= 3 * OPSIZ;
244       dstp -= 2 * OPSIZ;
245       a1 = ((op_t *) srcp)[2];
246       len += 5;
247       goto do2;
248     case 4:
249       srcp -= 4 * OPSIZ;
250       dstp -= 3 * OPSIZ;
251       a0 = ((op_t *) srcp)[3];
252       len += 4;
253       goto do3;
254     case 5:
255       srcp -= 5 * OPSIZ;
256       dstp -= 4 * OPSIZ;
257       a1 = ((op_t *) srcp)[4];
258       len += 3;
259       goto do4;
260     case 6:
261       srcp -= 6 * OPSIZ;
262       dstp -= 5 * OPSIZ;
263       a0 = ((op_t *) srcp)[5];
264       len += 2;
265       goto do5;
266     case 7:
267       srcp -= 7 * OPSIZ;
268       dstp -= 6 * OPSIZ;
269       a1 = ((op_t *) srcp)[6];
270       len += 1;
271       goto do6;
272 
273     case 0:
274       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
275 	return;
276       srcp -= 8 * OPSIZ;
277       dstp -= 7 * OPSIZ;
278       a0 = ((op_t *) srcp)[7];
279       goto do7;
280     case 1:
281       srcp -= 9 * OPSIZ;
282       dstp -= 8 * OPSIZ;
283       a1 = ((op_t *) srcp)[8];
284       len -= 1;
285       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
286 	goto do0;
287       goto do8;			/* No-op.  */
288     }
289 
290   do
291     {
292     do8:
293       a0 = ((op_t *) srcp)[7];
294       ((op_t *) dstp)[7] = a1;
295     do7:
296       a1 = ((op_t *) srcp)[6];
297       ((op_t *) dstp)[6] = a0;
298     do6:
299       a0 = ((op_t *) srcp)[5];
300       ((op_t *) dstp)[5] = a1;
301     do5:
302       a1 = ((op_t *) srcp)[4];
303       ((op_t *) dstp)[4] = a0;
304     do4:
305       a0 = ((op_t *) srcp)[3];
306       ((op_t *) dstp)[3] = a1;
307     do3:
308       a1 = ((op_t *) srcp)[2];
309       ((op_t *) dstp)[2] = a0;
310     do2:
311       a0 = ((op_t *) srcp)[1];
312       ((op_t *) dstp)[1] = a1;
313     do1:
314       a1 = ((op_t *) srcp)[0];
315       ((op_t *) dstp)[0] = a0;
316 
317       srcp -= 8 * OPSIZ;
318       dstp -= 8 * OPSIZ;
319       len -= 8;
320     }
321   while (len != 0);
322 
323   /* This is the right position for do0.  Please don't move
324      it into the loop.  */
325  do0:
326   ((op_t *) dstp)[7] = a1;
327 }
328 
329 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
330    before SRCP to block finishing right before DSTP with LEN `op_t'
331    words (not LEN bytes!).  DSTP should be aligned for memory
332    operations on `op_t', but SRCP must *not* be aligned.  */
333 
334 #ifndef WORDCOPY_BWD_DEST_ALIGNED
335 # define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned
336 #endif
337 
338 void
WORDCOPY_BWD_DEST_ALIGNED(long int dstp,long int srcp,size_t len)339 WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len)
340 {
341   op_t a0, a1, a2, a3;
342   int sh_1, sh_2;
343 
344   /* Calculate how to shift a word read at the memory operation
345      aligned srcp to make it aligned for copy.  */
346 
347   sh_1 = 8 * (srcp % OPSIZ);
348   sh_2 = 8 * OPSIZ - sh_1;
349 
350   /* Make srcp aligned by rounding it down to the beginning of the op_t
351      it points in the middle of.  */
352   srcp &= -OPSIZ;
353   srcp += OPSIZ;
354 
355   switch (len % 4)
356     {
357     case 2:
358       srcp -= 3 * OPSIZ;
359       dstp -= 1 * OPSIZ;
360       a2 = ((op_t *) srcp)[2];
361       a1 = ((op_t *) srcp)[1];
362       len += 2;
363       goto do1;
364     case 3:
365       srcp -= 4 * OPSIZ;
366       dstp -= 2 * OPSIZ;
367       a3 = ((op_t *) srcp)[3];
368       a2 = ((op_t *) srcp)[2];
369       len += 1;
370       goto do2;
371     case 0:
372       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
373 	return;
374       srcp -= 5 * OPSIZ;
375       dstp -= 3 * OPSIZ;
376       a0 = ((op_t *) srcp)[4];
377       a3 = ((op_t *) srcp)[3];
378       goto do3;
379     case 1:
380       srcp -= 6 * OPSIZ;
381       dstp -= 4 * OPSIZ;
382       a1 = ((op_t *) srcp)[5];
383       a0 = ((op_t *) srcp)[4];
384       len -= 1;
385       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
386 	goto do0;
387       goto do4;			/* No-op.  */
388     }
389 
390   do
391     {
392     do4:
393       a3 = ((op_t *) srcp)[3];
394       ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
395     do3:
396       a2 = ((op_t *) srcp)[2];
397       ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2);
398     do2:
399       a1 = ((op_t *) srcp)[1];
400       ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2);
401     do1:
402       a0 = ((op_t *) srcp)[0];
403       ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
404 
405       srcp -= 4 * OPSIZ;
406       dstp -= 4 * OPSIZ;
407       len -= 4;
408     }
409   while (len != 0);
410 
411   /* This is the right position for do0.  Please don't move
412      it into the loop.  */
413  do0:
414   ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
415 }
416